def compare_variable_attributes(fdatasets, r, name_list, sDir):
    vars_df = pd.DataFrame()
    for ii in range(len(fdatasets)):

        print('\n', fdatasets[ii].split('/')[-1])
        deployment = fdatasets[ii].split('/')[-1].split('_')[0].split('deployment')[-1]
        deployment = int(deployment)

        ds = xr.open_dataset(fdatasets[ii], mask_and_scale=False)
        time = ds['time'].values

        dr_dp = '-'.join((str(deployment))) #,ds.collection_method, ds.stream

        '''
        variable list
        '''
        var_list = cf.notin_list(ds.data_vars.keys(), ['time', '_qc_'])

        z_id, z_data, z_unit, z_name, z_fill = cf.add_pressure_to_dictionary_of_sci_vars(ds)
        df = pd.DataFrame({'var_id':[z_id], 'units':[z_unit[0]], 'long_name':[z_name[0]], 'fill_values':[z_fill[0]]},index=[dr_dp])
        vars_df = vars_df.append(df)

        for vname in name_list:

            vname_id, vname_unit, vname_name, vname_fv = get_variable_data(ds, var_list, vname)

            df = pd.DataFrame({'var_id':[vname_id], 'units':[vname_unit],
                               'long_name':[vname_name], 'fill_values':[str(vname_fv)]},index=[dr_dp])
            vars_df = vars_df.append(df)

    vars_df = vars_df.drop_duplicates()
    vars_df.to_csv('{}/{}_velocity_variables.csv'.format(sDir, r), index=True)

    return vars_df
Exemple #2
0
def main(url_list, sDir, deployment_num, start_time, end_time, preferred_only, n_std, inpercentile, zcell_size, zdbar):
    rd_list = []
    for uu in url_list:
        elements = uu.split('/')[-2].split('-')
        rd = '-'.join((elements[1], elements[2], elements[3], elements[4]))
        if rd not in rd_list and 'ENG' not in rd and 'ADCP' not in rd:
            rd_list.append(rd)

    for r in rd_list:
        print('\n{}'.format(r))
        datasets = []
        for u in url_list:
            splitter = u.split('/')[-2].split('-')
            rd_check = '-'.join((splitter[1], splitter[2], splitter[3], splitter[4]))
            if rd_check == r:
                udatasets = cf.get_nc_urls([u])
                datasets.append(udatasets)
        datasets = list(itertools.chain(*datasets))
        fdatasets = []
        if preferred_only == 'yes':
            # get the preferred stream information
            ps_df, n_streams = cf.get_preferred_stream_info(r)
            for index, row in ps_df.iterrows():
                for ii in range(n_streams):
                    try:
                        rms = '-'.join((r, row[ii]))
                    except TypeError:
                        continue
                    for dd in datasets:
                        spl = dd.split('/')[-2].split('-')
                        catalog_rms = '-'.join((spl[1], spl[2], spl[3], spl[4], spl[5], spl[6]))
                        fdeploy = dd.split('/')[-1].split('_')[0]
                        if rms == catalog_rms and fdeploy == row['deployment']:
                            fdatasets.append(dd)
        else:
            fdatasets = datasets

        main_sensor = r.split('-')[-1]
        fdatasets_sel = cf.filter_collocated_instruments(main_sensor, fdatasets)

        for fd in fdatasets_sel:
            part_d = fd.split('/')[-1]
            print('\n{}'.format(part_d))
            ds = xr.open_dataset(fd, mask_and_scale=False)
            ds = ds.swap_dims({'obs': 'time'})

            fname, subsite, refdes, method, stream, deployment = cf.nc_attributes(fd)
            array = subsite[0:2]
            sci_vars = cf.return_science_vars(stream)

            # if 'CE05MOAS' in r or 'CP05MOAS' in r:  # for coastal gliders, get m_water_depth for bathymetry
            #     eng = '-'.join((r.split('-')[0], r.split('-')[1], '00-ENG000000', method, 'glider_eng'))
            #     eng_url = [s for s in url_list if eng in s]
            #     if len(eng_url) == 1:
            #         eng_datasets = cf.get_nc_urls(eng_url)
            #         # filter out collocated datasets
            #         eng_dataset = [j for j in eng_datasets if (eng in j.split('/')[-1] and deployment in j.split('/')[-1])]
            #         if len(eng_dataset) > 0:
            #             ds_eng = xr.open_dataset(eng_dataset[0], mask_and_scale=False)
            #             t_eng = ds_eng['time'].values
            #             m_water_depth = ds_eng['m_water_depth'].values
            #
            #             # m_altitude = glider height above seafloor
            #             # m_depth = glider depth in the water column
            #             # m_altitude = ds_eng['m_altitude'].values
            #             # m_depth = ds_eng['m_depth'].values
            #             # calc_water_depth = m_altitude + m_depth
            #
            #             # m_altimeter_status = 0 means a good reading (not nan or -1)
            #             try:
            #                 eng_ind = ds_eng['m_altimeter_status'].values == 0
            #             except KeyError:
            #                 eng_ind = (~np.isnan(m_water_depth)) & (m_water_depth >= 0)
            #
            #             m_water_depth = m_water_depth[eng_ind]
            #             t_eng = t_eng[eng_ind]
            #
            #             # get rid of any remaining nans or fill values
            #             eng_ind2 = (~np.isnan(m_water_depth)) & (m_water_depth >= 0)
            #             m_water_depth = m_water_depth[eng_ind2]
            #             t_eng = t_eng[eng_ind2]
            #         else:
            #             print('No engineering file for deployment {}'.format(deployment))
            #             m_water_depth = None
            #             t_eng = None
            #     else:
            #         m_water_depth = None
            #         t_eng = None
            # else:
            #     m_water_depth = None
            #     t_eng = None

            if deployment_num is not None:
                if int(int(deployment[-4:])) is not deployment_num:
                    print(type(int(deployment[-4:])), type(deployment_num))
                    continue

            if start_time is not None and end_time is not None:
                ds = ds.sel(time=slice(start_time, end_time))
                if len(ds['time'].values) == 0:
                    print('No data to plot for specified time range: ({} to {})'.format(start_time, end_time))
                    continue
                stime = start_time.strftime('%Y-%m-%d')
                etime = end_time.strftime('%Y-%m-%d')
                ext = stime + 'to' + etime  # .join((ds0_method, ds1_method
                save_dir_profile = os.path.join(sDir, array, subsite, refdes, 'profile_plots', deployment, ext)
                save_dir_xsection = os.path.join(sDir, array, subsite, refdes, 'xsection_plots', deployment, ext)
                save_dir_4d = os.path.join(sDir, array, subsite, refdes, 'xsection_plots_4d', deployment, ext)
            else:
                save_dir_profile = os.path.join(sDir, array, subsite, refdes, 'profile_plots', deployment)
                save_dir_xsection = os.path.join(sDir, array, subsite, refdes, 'xsection_plots', deployment)
                save_dir_4d = os.path.join(sDir, array, subsite, refdes, 'xsection_plots_4d', deployment)

            time1 = ds['time'].values
            try:
                ds_lat1 = ds['lat'].values
            except KeyError:
                ds_lat1 = None
                print('No latitude variable in file')
            try:
                ds_lon1 = ds['lon'].values
            except KeyError:
                ds_lon1 = None
                print('No longitude variable in file')

            # get pressure variable
            pvarname, y1, y_units, press, y_fillvalue = cf.add_pressure_to_dictionary_of_sci_vars(ds)

            for sv in sci_vars:
                print('')
                print(sv)
                if 'pressure' not in sv:
                    if sv == 'spkir_abj_cspp_downwelling_vector':
                        pxso.pf_xs_spkir(ds, sv, time1, y1, ds_lat1, ds_lon1, zcell_size, inpercentile, save_dir_profile,
                                         save_dir_xsection, deployment, press, y_units, n_std, zdbar)
                    elif 'OPTAA' in r:
                        if sv not in ['wavelength_a', 'wavelength_c']:
                            pxso.pf_xs_optaa(ds, sv, time1, y1, ds_lat1, ds_lon1, zcell_size, inpercentile, save_dir_profile,
                                             save_dir_xsection, deployment, press, y_units, n_std, zdbar)
                    else:
                        z1 = ds[sv].values
                        fv = ds[sv]._FillValue
                        sv_units = ds[sv].units

                        # Check if the array is all NaNs
                        if sum(np.isnan(z1)) == len(z1):
                            print('Array of all NaNs - skipping plot.')
                            continue

                        # Check if the array is all fill values
                        elif len(z1[z1 != fv]) == 0:
                            print('Array of all fill values - skipping plot.')
                            continue

                        else:
                            # remove unreasonable pressure data (e.g. for surface piercing profilers)
                            if zdbar:
                                po_ind = (0 < y1) & (y1 < zdbar)
                                tm = time1[po_ind]
                                y = y1[po_ind]
                                z = z1[po_ind]
                                ds_lat = ds_lat1[po_ind]
                                ds_lon = ds_lon1[po_ind]
                            else:
                                tm = time1
                                y = y1
                                z = z1
                                ds_lat = ds_lat1
                                ds_lon = ds_lon1

                            # reject erroneous data
                            dtime, zpressure, ndata, lenfv, lennan, lenev, lengr, global_min, global_max, lat, lon = \
                                cf.reject_erroneous_data(r, sv, tm, y, z, fv, ds_lat, ds_lon)

                            # get rid of 0.0 data
                            if sv == 'salinity':
                                ind = ndata > 30
                            elif sv == 'density':
                                ind = ndata > 1022.5
                            elif sv == 'conductivity':
                                ind = ndata > 3.45
                            else:
                                ind = ndata > 0
                            # if sv == 'sci_flbbcd_chlor_units':
                            #     ind = ndata < 7.5
                            # elif sv == 'sci_flbbcd_cdom_units':
                            #     ind = ndata < 25
                            # else:
                            #     ind = ndata > 0.0

                            # if 'CTD' in r:
                            #     ind = zpressure > 0.0
                            # else:
                            #     ind = ndata > 0.0

                            lenzero = np.sum(~ind)
                            dtime = dtime[ind]
                            zpressure = zpressure[ind]
                            ndata = ndata[ind]
                            if ds_lat is not None and ds_lon is not None:
                                lat = lat[ind]
                                lon = lon[ind]
                            else:
                                lat = None
                                lon = None

                            if len(dtime) > 0:
                                # reject time range from data portal file export
                                t_portal, z_portal, y_portal, lat_portal, lon_portal = \
                                    cf.reject_timestamps_dataportal(subsite, r, dtime, zpressure, ndata, lat, lon)

                                print('removed {} data points using visual inspection of data'.format(
                                    len(ndata) - len(z_portal)))

                                # create data groups
                                if len(y_portal) > 0:
                                    columns = ['tsec', 'dbar', str(sv)]
                                    min_r = int(round(np.nanmin(y_portal) - zcell_size))
                                    max_r = int(round(np.nanmax(y_portal) + zcell_size))
                                    ranges = list(range(min_r, max_r, zcell_size))

                                    groups, d_groups = gt.group_by_depth_range(t_portal, y_portal, z_portal, columns, ranges)

                                    if 'scatter' in sv:
                                        n_std = None  # to use percentile
                                    else:
                                        n_std = n_std

                                    #  get percentile analysis for printing on the profile plot
                                    y_avg, n_avg, n_min, n_max, n0_std, n1_std, l_arr, time_ex = cf.reject_timestamps_in_groups(
                                        groups, d_groups, n_std, inpercentile)

                            """
                            Plot all data
                            """
                            if len(time1) > 0:
                                cf.create_dir(save_dir_profile)
                                cf.create_dir(save_dir_xsection)
                                sname = '-'.join((r, method, sv))
                                sfileall = '_'.join(('all_data', sname, pd.to_datetime(time1.min()).strftime('%Y%m%d')))
                                tm0 = pd.to_datetime(time1.min()).strftime('%Y-%m-%dT%H:%M:%S')
                                tm1 = pd.to_datetime(time1.max()).strftime('%Y-%m-%dT%H:%M:%S')
                                title = ' '.join((deployment, refdes, method)) + '\n' + tm0 + ' to ' + tm1
                                if 'SPKIR' in r:
                                    title = title + '\nWavelength = 510 nm'

                                '''
                                profile plot
                                '''
                                xlabel = sv + " (" + sv_units + ")"
                                ylabel = press[0] + " (" + y_units[0] + ")"
                                clabel = 'Time'

                                fig, ax = pf.plot_profiles(z1, y1, time1, ylabel, xlabel, clabel, stdev=None)

                                ax.set_title(title, fontsize=9)
                                fig.tight_layout()
                                pf.save_fig(save_dir_profile, sfileall)

                                '''
                                xsection plot
                                '''
                                clabel = sv + " (" + sv_units + ")"
                                ylabel = press[0] + " (" + y_units[0] + ")"

                                fig, ax, bar = pf.plot_xsection(subsite, time1, y1, z1, clabel, ylabel, t_eng=None,
                                                                m_water_depth=None, inpercentile=None, stdev=None)

                                if fig:
                                    ax.set_title(title, fontsize=9)
                                    fig.tight_layout()
                                    pf.save_fig(save_dir_xsection, sfileall)

                            """
                            Plot cleaned-up data
                            """
                            if len(dtime) > 0:
                                if len(y_portal) > 0:
                                    sfile = '_'.join(('rm_erroneous_data', sname, pd.to_datetime(t_portal.min()).strftime('%Y%m%d')))
                                    t0 = pd.to_datetime(t_portal.min()).strftime('%Y-%m-%dT%H:%M:%S')
                                    t1 = pd.to_datetime(t_portal.max()).strftime('%Y-%m-%dT%H:%M:%S')
                                    title = ' '.join((deployment, refdes, method)) + '\n' + t0 + ' to ' + t1
                                    if 'SPKIR' in r:
                                        title = title + '\nWavelength = 510 nm'

                                    '''
                                    profile plot
                                    '''
                                    xlabel = sv + " (" + sv_units + ")"
                                    ylabel = press[0] + " (" + y_units[0] + ")"
                                    clabel = 'Time'

                                    fig, ax = pf.plot_profiles(z_portal, y_portal, t_portal, ylabel, xlabel, clabel, stdev=None)

                                    ax.set_title(title, fontsize=9)
                                    ax.plot(n_avg, y_avg, '-k')
                                    ax.fill_betweenx(y_avg, n0_std, n1_std, color='m', alpha=0.2)
                                    if inpercentile:
                                        leg_text = (
                                            'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}], '
                                            '{} unreasonable values'.format(lenfv, lennan, lenev, lengr, global_min, global_max, lenzero) +
                                            '\nexcluded {} suspect data points when inspected visually'.format(
                                                len(ndata) - len(z_portal)) +
                                            '\n(black) data average in {} dbar segments'.format(zcell_size) +
                                            '\n(magenta) {} percentile envelope in {} dbar segments'.format(
                                                int(100 - inpercentile * 2), zcell_size),)
                                    elif n_std:
                                        leg_text = (
                                            'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}], '
                                            '{} unreasonable values'.format(lenfv, lennan, lenev, lengr, global_min, global_max,
                                                              lenzero) +
                                            '\nexcluded {} suspect data points when inspected visually'.format(
                                                len(ndata) - len(z_portal)) +
                                            '\n(black) data average in {} dbar segments'.format(zcell_size) +
                                            '\n(magenta) +/- {} SD envelope in {} dbar segments'.format(
                                                int(n_std), zcell_size),)
                                    ax.legend(leg_text, loc='upper center', bbox_to_anchor=(0.5, -0.17), fontsize=6)
                                    fig.tight_layout()
                                    pf.save_fig(save_dir_profile, sfile)

                                    '''
                                    xsection plot
                                    '''
                                    clabel = sv + " (" + sv_units + ")"
                                    ylabel = press[0] + " (" + y_units[0] + ")"

                                    # plot non-erroneous data
                                    fig, ax, bar = pf.plot_xsection(subsite, t_portal, y_portal, z_portal, clabel, ylabel,
                                                                    t_eng=None, m_water_depth=None, inpercentile=None,
                                                                    stdev=None)

                                    ax.set_title(title, fontsize=9)
                                    leg_text = (
                                        'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}], '
                                        '{} unreasonable values'.format(lenfv, lennan, lenev, lengr, global_min, global_max, lenzero) +
                                        '\nexcluded {} suspect data points when inspected visually'.format(
                                            len(ndata) - len(z_portal)),
                                    )
                                    ax.legend(leg_text, loc='upper center', bbox_to_anchor=(0.5, -0.17), fontsize=6)
                                    fig.tight_layout()
                                    pf.save_fig(save_dir_xsection, sfile)

                                    '''
                                    4D plot for gliders only
                                    '''
                                    if 'MOAS' in r:
                                        if ds_lat is not None and ds_lon is not None:
                                            cf.create_dir(save_dir_4d)

                                            clabel = sv + " (" + sv_units + ")"
                                            zlabel = press[0] + " (" + y_units[0] + ")"

                                            fig = plt.figure()
                                            ax = fig.add_subplot(111, projection='3d')
                                            sct = ax.scatter(lon_portal, lat_portal, y_portal, c=z_portal, s=2)
                                            cbar = plt.colorbar(sct, label=clabel, extend='both')
                                            cbar.ax.tick_params(labelsize=8)
                                            ax.invert_zaxis()
                                            ax.view_init(25, 32)
                                            ax.invert_xaxis()
                                            ax.invert_yaxis()
                                            ax.set_zlabel(zlabel, fontsize=9)
                                            ax.set_ylabel('Latitude', fontsize=9)
                                            ax.set_xlabel('Longitude', fontsize=9)

                                            ax.set_title(title, fontsize=9)
                                            pf.save_fig(save_dir_4d, sfile)
Exemple #3
0
def append_variable_data(ds, variable_dict, common_stream_name, exclude_times):
    pressure_unit, pressure_name = [], []
    ds_vars = cf.return_raw_vars(list(ds.data_vars.keys()) + list(ds.coords))
    vars_dict = variable_dict[common_stream_name]['vars']

    print('\nPARAMETERS: ')
    for var in ds_vars:
        try:
            long_name = ds[var].long_name
            x = [x for x in list(vars_dict.keys()) if long_name in x]
            if len(x) != 0:
                long_name = x[0]
                if ds[var].units == vars_dict[long_name]['db_units']:
                    print('______', long_name)
                    if ds[var]._FillValue not in vars_dict[long_name]['fv']:
                        vars_dict[long_name]['fv'].append(ds[var]._FillValue)
                    if ds[var].units not in vars_dict[long_name]['units']:
                        vars_dict[long_name]['units'].append(ds[var].units)
                    tD = ds['time'].values
                    varD = ds[var].values
                    deployD = ds['deployment'].values

                    # find the pressure to use from the data file
                    pvar, pD, p_unit, p_name, p_fv = cf.add_pressure_to_dictionary_of_sci_vars(
                        ds)
                    if p_unit not in pressure_unit:
                        pressure_unit.append(p_unit)
                    if p_name not in pressure_name:
                        pressure_name.append(p_name)

                    if len(ds[var].dims) == 1:
                        if len(exclude_times) > 0:
                            for et in exclude_times:
                                tD, pD, varD, deployD = exclude_time_ranges(
                                    tD, pD, varD, deployD, et)
                            if len(tD) > 0:
                                vars_dict[long_name]['t'] = np.append(
                                    vars_dict[long_name]['t'], tD)
                                vars_dict[long_name]['pressure'] = np.append(
                                    vars_dict[long_name]['pressure'], pD)
                                vars_dict[long_name]['values'] = np.append(
                                    vars_dict[long_name]['values'], varD)
                                vars_dict[long_name][
                                    'deployments'] = np.append(
                                        vars_dict[long_name]['deployments'],
                                        deployD)
                        else:
                            vars_dict[long_name]['t'] = np.append(
                                vars_dict[long_name]['t'], tD)
                            vars_dict[long_name]['pressure'] = np.append(
                                vars_dict[long_name]['pressure'], pD)
                            vars_dict[long_name]['values'] = np.append(
                                vars_dict[long_name]['values'], varD)
                            vars_dict[long_name]['deployments'] = np.append(
                                vars_dict[long_name]['deployments'], deployD)
                    else:
                        # appending 2D datasets
                        if type(vars_dict[long_name]['values']) != dict:
                            vars_dict[long_name].pop('values')
                            vars_dict[long_name].update({'values': dict()})
                        varD = varD.T

                        # for presf_wave_burst data, telemetered and recovered_host pressure data have a matrix of 20,
                        # while recovered_inst data have a matrix of 1024. for DCL data, whatever is above 20 will
                        # be an array of nans as placeholders (so the indices match between DCL and recovered_inst
                        if common_stream_name == 'presf_abc_wave_burst':
                            lendims = 1024
                        else:
                            lendims = len(varD)
                        for i in range(lendims):
                            tD = ds['time'].values  # reset the time variable
                            deployD = ds['deployment'].values
                            pDi = pD
                            try:
                                vars_dict[long_name]['values'][i]
                            except KeyError:
                                vars_dict[long_name]['values'].update(
                                    {i: np.array([])})
                            try:
                                varDi = varD[i]
                            except IndexError:
                                varDi = np.empty(np.shape(tD))
                                varDi[:] = np.nan
                            if len(exclude_times) > 0:
                                for et in exclude_times:
                                    tD, pDi, varDi, deployD = exclude_time_ranges(
                                        tD, pDi, varDi, deployD, et)
                                if len(tD) > 0:
                                    if i == 0:
                                        vars_dict[long_name]['t'] = np.append(
                                            vars_dict[long_name]['t'], tD)
                                        vars_dict[long_name][
                                            'pressure'] = np.append(
                                                vars_dict[long_name]
                                                ['pressure'], pDi)
                                        vars_dict[long_name]['values'][
                                            i] = np.append(
                                                vars_dict[long_name]['values']
                                                [i], varDi)
                                        vars_dict[long_name][
                                            'deployments'] = np.append(
                                                vars_dict[long_name]
                                                ['deployments'], deployD)
                                    else:
                                        vars_dict[long_name]['values'][
                                            i] = np.append(
                                                vars_dict[long_name]['values']
                                                [i], varDi)
                            else:
                                if i == 0:
                                    vars_dict[long_name]['t'] = np.append(
                                        vars_dict[long_name]['t'], tD)
                                    vars_dict[long_name][
                                        'pressure'] = np.append(
                                            vars_dict[long_name]['pressure'],
                                            pDi)
                                    vars_dict[long_name]['values'][
                                        i] = np.append(
                                            vars_dict[long_name]['values'][i],
                                            varDi)
                                    vars_dict[long_name][
                                        'deployments'] = np.append(
                                            vars_dict[long_name]
                                            ['deployments'], deployD)
                                else:
                                    vars_dict[long_name]['values'][
                                        i] = np.append(
                                            vars_dict[long_name]['values'][i],
                                            varDi)
        except AttributeError:
            continue

    return variable_dict, pressure_unit, pressure_name
Exemple #4
0
def append_evaluated_data(sDir, deployment, ds, variable_dict,
                          common_stream_name, zdbar):
    pressure_unit, pressure_name = [], []
    r = '{}-{}-{}'.format(ds.subsite, ds.node, ds.sensor)
    ds_vars = cf.return_raw_vars(list(ds.data_vars.keys()) + list(ds.coords))
    vars_dict = variable_dict[common_stream_name]['vars']
    total_len = 0
    for var in ds_vars:
        try:
            long_name = ds[var].long_name
            x = [x for x in list(vars_dict.keys()) if long_name in x]
            if len(x) != 0:
                long_name = x[0]
                if ds[var].units == vars_dict[long_name]['db_units']:
                    print('\n' + var)
                    if ds[var]._FillValue not in vars_dict[long_name]['fv']:
                        vars_dict[long_name]['fv'].append(ds[var]._FillValue)
                    if ds[var].units not in vars_dict[long_name]['units']:
                        vars_dict[long_name]['units'].append(ds[var].units)
                    tD = ds['time'].values
                    varD = ds[var].values
                    deployD = ds['deployment'].values

                    # find the pressure to use from the data file
                    pvarname, pD, p_unit, p_name, p_fillvalue = cf.add_pressure_to_dictionary_of_sci_vars(
                        ds)
                    if p_unit not in pressure_unit:
                        pressure_unit.append(p_unit)
                    if p_name not in pressure_name:
                        pressure_name.append(p_name)

                    l0 = len(tD)
                    # reject erroneous data
                    tD, pD, varD, deployD = reject_erroneous_data(
                        r, var, tD, pD, varD, deployD, ds[var]._FillValue)
                    l_erroneous = len(tD)
                    print('{} erroneous data'.format(l0 - l_erroneous))

                    if l_erroneous != 0:
                        # reject time range from data portal file export
                        tD, pD, varD, deployD = reject_timestamps_data_portal(
                            ds.subsite, r, tD, pD, varD, deployD)
                        l_portal = len(tD)
                        print('{} suspect  - data portal'.format(l_erroneous -
                                                                 l_portal))

                        if l_portal != 0:
                            # reject timestamps from stat analysis
                            Dpath = '{}/{}/{}/{}/{}'.format(
                                sDir, ds.subsite[0:2], ds.subsite, r,
                                'time_to_exclude')
                            tD, pD, varD, deployD = reject_timestamps_from_stat_analysis(
                                Dpath, deployment, var, tD, pD, varD, deployD)
                            l_stat = len(tD)
                            print(
                                '{} suspect  - stat analysis'.format(l_portal -
                                                                     l_stat))

                            # # reject timestamps in a depth range
                            tD, pD, varD, deployD = reject_data_in_depth_range(
                                tD, pD, varD, deployD, zdbar)
                            l_zrange = len(tD)
                            print('{} suspect - water depth > {} dbar'.format(
                                l_stat - l_zrange, zdbar))
                        else:
                            print(
                                'suspect data - rejected all, see data portal')
                    else:
                        print('erroneous data - rejected all')

                    vars_dict[long_name]['t'] = np.append(
                        vars_dict[long_name]['t'], tD)
                    vars_dict[long_name]['pressure'] = np.append(
                        vars_dict[long_name]['pressure'], pD)
                    vars_dict[long_name]['values'] = np.append(
                        vars_dict[long_name]['values'], varD)
                    vars_dict[long_name]['deployments'] = np.append(
                        vars_dict[long_name]['deployments'], deployD)
                total_len += l0

        except AttributeError:
            continue

    return variable_dict, pressure_unit, pressure_name, total_len
Exemple #5
0
def main(url_list, sDir, deployment_num, start_time, end_time, preferred_only,
         n_std, surface_params, depth_params):
    rd_list = []
    for uu in url_list:
        elements = uu.split('/')[-2].split('-')
        rd = '-'.join((elements[1], elements[2], elements[3], elements[4]))
        if rd not in rd_list and 'ENG' not in rd:
            rd_list.append(rd)

    for r in rd_list:
        print('\n{}'.format(r))
        datasets = []
        for u in url_list:
            splitter = u.split('/')[-2].split('-')
            rd_check = '-'.join(
                (splitter[1], splitter[2], splitter[3], splitter[4]))
            if rd_check == r:
                udatasets = cf.get_nc_urls([u])
                datasets.append(udatasets)
        datasets = list(itertools.chain(*datasets))
        fdatasets = []
        if preferred_only == 'yes':
            # get the preferred stream information
            ps_df, n_streams = cf.get_preferred_stream_info(r)
            for index, row in ps_df.iterrows():
                for ii in range(n_streams):
                    try:
                        rms = '-'.join((r, row[ii]))
                    except TypeError:
                        continue
                    for dd in datasets:
                        spl = dd.split('/')[-2].split('-')
                        catalog_rms = '-'.join(
                            (spl[1], spl[2], spl[3], spl[4], spl[5], spl[6]))
                        fdeploy = dd.split('/')[-1].split('_')[0]
                        if rms == catalog_rms and fdeploy == row['deployment']:
                            fdatasets.append(dd)
        else:
            fdatasets = datasets

        main_sensor = r.split('-')[-1]
        fdatasets_sel = cf.filter_collocated_instruments(
            main_sensor, fdatasets)

        for fd in fdatasets_sel:
            part_d = fd.split('/')[-1]
            print('\n{}'.format(part_d))
            ds = xr.open_dataset(fd, mask_and_scale=False)
            ds = ds.swap_dims({'obs': 'time'})

            fname, subsite, refdes, method, stream, deployment = cf.nc_attributes(
                fd)
            array = subsite[0:2]
            sci_vars = cf.return_science_vars(stream)

            if 'CE05MOAS' in r or 'CP05MOAS' in r:  # for coastal gliders, get m_water_depth for bathymetry
                eng = '-'.join((r.split('-')[0], r.split('-')[1],
                                '00-ENG000000', method, 'glider_eng'))
                eng_url = [s for s in url_list if eng in s]
                if len(eng_url) == 1:
                    eng_datasets = cf.get_nc_urls(eng_url)
                    # filter out collocated datasets
                    eng_dataset = [
                        j for j in eng_datasets
                        if (eng in j.split('/')[-1]
                            and deployment in j.split('/')[-1])
                    ]
                    if len(eng_dataset) > 0:
                        ds_eng = xr.open_dataset(eng_dataset[0],
                                                 mask_and_scale=False)
                        t_eng = ds_eng['time'].values
                        m_water_depth = ds_eng['m_water_depth'].values

                        # m_altimeter_status = 0 means a good reading (not nan or -1)
                        eng_ind = ds_eng['m_altimeter_status'].values == 0
                        m_water_depth = m_water_depth[eng_ind]
                        t_eng = t_eng[eng_ind]
                    else:
                        print('No engineering file for deployment {}'.format(
                            deployment))
                        m_water_depth = None
                        t_eng = None
                else:
                    m_water_depth = None
                    t_eng = None
            else:
                m_water_depth = None
                t_eng = None

            if deployment_num is not None:
                if int(deployment.split('0')[-1]) is not deployment_num:
                    print(type(int(deployment.split('0')[-1])),
                          type(deployment_num))
                    continue

            if start_time is not None and end_time is not None:
                ds = ds.sel(time=slice(start_time, end_time))
                if len(ds['time'].values) == 0:
                    print(
                        'No data to plot for specified time range: ({} to {})'.
                        format(start_time, end_time))
                    continue
                stime = start_time.strftime('%Y-%m-%d')
                etime = end_time.strftime('%Y-%m-%d')
                ext = stime + 'to' + etime  # .join((ds0_method, ds1_method
                save_dir_profile = os.path.join(sDir, array, subsite, refdes,
                                                'profile_plots', deployment,
                                                ext)
                save_dir_xsection = os.path.join(sDir, array, subsite, refdes,
                                                 'xsection_plots', deployment,
                                                 ext)
                save_dir_4d = os.path.join(sDir, array, subsite, refdes,
                                           'xsection_plots_4d', deployment,
                                           ext)
            else:
                save_dir_profile = os.path.join(sDir, array, subsite, refdes,
                                                'profile_plots', deployment)
                save_dir_xsection = os.path.join(sDir, array, subsite, refdes,
                                                 'xsection_plots', deployment)
                save_dir_4d = os.path.join(sDir, array, subsite, refdes,
                                           'xsection_plots_4d', deployment)

            tm = ds['time'].values
            try:
                ds_lat = ds['lat'].values
            except KeyError:
                ds_lat = None
                print('No latitude variable in file')
            try:
                ds_lon = ds['lon'].values
            except KeyError:
                ds_lon = None
                print('No longitude variable in file')

            # get pressure variable
            y, y_units, press = cf.add_pressure_to_dictionary_of_sci_vars(ds)

            for sv in sci_vars:
                print(sv)
                if 'pressure' not in sv:
                    z = ds[sv].values
                    fv = ds[sv]._FillValue
                    sv_units = ds[sv].units

                    # Check if the array is all NaNs
                    if sum(np.isnan(z)) == len(z):
                        print('Array of all NaNs - skipping plot.')
                        continue

                    # Check if the array is all fill values
                    elif len(z[z != fv]) == 0:
                        print('Array of all fill values - skipping plot.')
                        continue

                    else:
                        # reject erroneous data
                        dtime, zpressure, ndata, lenfv, lennan, lenev, lengr, global_min, global_max, lat, lon = \
                            cf.reject_erroneous_data(r, sv, tm, y, z, fv, ds_lat, ds_lon)

                        # get rid of 0.0 data
                        if 'CTD' in r:
                            ind = zpressure > 0.0
                        else:
                            ind = ndata > 0.0

                        lenzero = np.sum(~ind)
                        dtime = dtime[ind]
                        zpressure = zpressure[ind]
                        ndata = ndata[ind]
                        if ds_lat is not None and ds_lon is not None:
                            lat = lat[ind]
                            lon = lon[ind]
                        else:
                            lat = None
                            lon = None

                        t0 = pd.to_datetime(
                            dtime.min()).strftime('%Y-%m-%dT%H:%M:%S')
                        t1 = pd.to_datetime(
                            dtime.max()).strftime('%Y-%m-%dT%H:%M:%S')
                        title = ' '.join((deployment, refdes,
                                          method)) + '\n' + t0 + ' to ' + t1

                        # reject time range from data portal file export
                        t_portal, z_portal, y_portal, lat_portal, lon_portal = \
                            cf.reject_timestamps_dataportal(subsite, r, dtime, zpressure, ndata, lat, lon)

                        print(
                            'removed {} data points using visual inspection of data'
                            .format(len(ndata) - len(z_portal)))

                        # create data groups
                        columns = ['tsec', 'dbar', str(sv)]
                        # min_r = int(round(min(y_portal) - zcell_size))
                        # max_r = int(round(max(y_portal) + zcell_size))
                        # ranges = list(range(min_r, max_r, zcell_size))
                        #ranges = [0, 10, 20, 30, 40, 50, 60, 70, 80, 200]
                        range1 = list(
                            range(surface_params[0], surface_params[1],
                                  surface_params[2]))
                        range2 = list(
                            range(depth_params[0],
                                  depth_params[1] + depth_params[2],
                                  depth_params[2]))
                        ranges = range1 + range2

                        groups, d_groups = gt.group_by_depth_range(
                            t_portal, y_portal, z_portal, columns, ranges)

                        if 'scatter' in sv:
                            n_std = None  # to use percentile
                        else:
                            n_std = n_std

                        #  get percentile analysis for printing on the profile plot
                        inpercentile = [surface_params[3]] * len(
                            range1) + [depth_params[3]] * len(range2)
                        n_std = [surface_params[3]] * len(
                            range1) + [depth_params[3]] * len(range2)
                        y_plt, n_med, n_min, n_max, n0_std, n1_std, l_arr, time_ex = reject_timestamps_in_groups(
                            groups, d_groups, n_std, inpercentile)
                        """
                        Plot all data
                        """
                        if len(tm) > 0:
                            cf.create_dir(save_dir_profile)
                            cf.create_dir(save_dir_xsection)
                            sname = '-'.join((r, method, sv))
                            sfileall = '_'.join(('all_data', sname))
                            '''
                            profile plot
                            '''
                            xlabel = sv + " (" + sv_units + ")"
                            ylabel = press[0] + " (" + y_units[0] + ")"
                            clabel = 'Time'

                            fig, ax = pf.plot_profiles(z,
                                                       y,
                                                       tm,
                                                       ylabel,
                                                       xlabel,
                                                       clabel,
                                                       stdev=None)

                            ax.set_title(title, fontsize=9)
                            fig.tight_layout()
                            pf.save_fig(save_dir_profile, sfileall)
                            '''
                            xsection plot
                            '''
                            clabel = sv + " (" + sv_units + ")"
                            ylabel = press[0] + " (" + y_units[0] + ")"

                            fig, ax, bar = pf.plot_xsection(subsite,
                                                            tm,
                                                            y,
                                                            z,
                                                            clabel,
                                                            ylabel,
                                                            t_eng,
                                                            m_water_depth,
                                                            inpercentile=None,
                                                            stdev=None)

                            ax.set_title(title, fontsize=9)
                            fig.tight_layout()
                            pf.save_fig(save_dir_xsection, sfileall)
                        """
                        Plot cleaned-up data
                        """
                        if len(dtime) > 0:

                            sfile = '_'.join(('rm_erroneous_data', sname))
                            '''
                            profile plot
                            '''
                            xlabel = sv + " (" + sv_units + ")"
                            ylabel = press[0] + " (" + y_units[0] + ")"
                            clabel = 'Time'

                            fig, ax = pf.plot_profiles(z_portal,
                                                       y_portal,
                                                       t_portal,
                                                       ylabel,
                                                       xlabel,
                                                       clabel,
                                                       stdev=None)

                            ax.set_title(title, fontsize=9)
                            ax.plot(n_med, y_plt, '.k')
                            ax.fill_betweenx(y_plt,
                                             n0_std,
                                             n1_std,
                                             color='m',
                                             alpha=0.2)
                            leg_text = (
                                'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}], '
                                '{} zeros'.format(lenfv, lennan, lenev, lengr,
                                                  global_min, global_max,
                                                  lenzero) +
                                '\nexcluded {} suspect data points when inspected visually'
                                .format(len(ndata) - len(z_portal)) +
                                '\n(black) data median in {} dbar segments (break at {} dbar)'
                                .format([surface_params[2], depth_params[2]],
                                        depth_params[0]) +
                                '\n(magenta) upper and lower {} percentile envelope in {} dbar segments'
                                .format(
                                    [surface_params[3], depth_params[3]],
                                    [surface_params[2], depth_params[2]]), )
                            ax.legend(leg_text,
                                      loc='upper center',
                                      bbox_to_anchor=(0.5, -0.17),
                                      fontsize=6)
                            fig.tight_layout()
                            pf.save_fig(save_dir_profile, sfile)
                            '''
                            xsection plot
                            '''
                            clabel = sv + " (" + sv_units + ")"
                            ylabel = press[0] + " (" + y_units[0] + ")"

                            # plot non-erroneous data
                            fig, ax, bar = pf.plot_xsection(subsite,
                                                            t_portal,
                                                            y_portal,
                                                            z_portal,
                                                            clabel,
                                                            ylabel,
                                                            t_eng,
                                                            m_water_depth,
                                                            inpercentile=None,
                                                            stdev=None)

                            ax.set_title(title, fontsize=9)
                            leg_text = (
                                'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}], '
                                '{} zeros'.format(lenfv, lennan, lenev, lengr,
                                                  global_min, global_max,
                                                  lenzero) +
                                '\nexcluded {} suspect data points when inspected visually'
                                .format(len(ndata) - len(z_portal)), )
                            ax.legend(leg_text,
                                      loc='upper center',
                                      bbox_to_anchor=(0.5, -0.17),
                                      fontsize=6)
                            fig.tight_layout()
                            pf.save_fig(save_dir_xsection, sfile)
                            '''
                            4D plot for gliders only
                            '''
                            if 'MOAS' in r:
                                if ds_lat is not None and ds_lon is not None:
                                    cf.create_dir(save_dir_4d)

                                    clabel = sv + " (" + sv_units + ")"
                                    zlabel = press[0] + " (" + y_units[0] + ")"

                                    fig = plt.figure()
                                    ax = fig.add_subplot(111, projection='3d')
                                    sct = ax.scatter(lon_portal,
                                                     lat_portal,
                                                     y_portal,
                                                     c=z_portal,
                                                     s=2)
                                    cbar = plt.colorbar(sct,
                                                        label=clabel,
                                                        extend='both')
                                    cbar.ax.tick_params(labelsize=8)
                                    ax.invert_zaxis()
                                    ax.view_init(25, 32)
                                    ax.invert_xaxis()
                                    ax.invert_yaxis()
                                    ax.set_zlabel(zlabel, fontsize=9)
                                    ax.set_ylabel('Latitude', fontsize=9)
                                    ax.set_xlabel('Longitude', fontsize=9)

                                    ax.set_title(title, fontsize=9)
                                    pf.save_fig(save_dir_4d, sfile)
def plot_velocity_variables(r, fdatasets, num_plots, save_dir):

    fig, ax = plt.subplots(nrows=num_plots, ncols=1, sharey=True)
    fig.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0)
    fig_file = 'calculated_currents_plot'

    fig0, ax0 = plt.subplots(nrows=num_plots, ncols=1, sharey=True)
    fig0.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0)
    fig0_file = 'uvw_plots'

    fig1, ax1 = plt.subplots(nrows=num_plots, ncols=1, sharey=True)
    fig1.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0)
    fig1_file = 'pressure_plots'

    fig2, ax2 = plt.subplots(nrows=num_plots, ncols=1, sharey=True)
    fig2.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0)
    fig2_file = 'roll_plots'

    fig3, ax3 = plt.subplots(nrows=num_plots, ncols=1, sharey=True)
    fig3.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0)
    fig3_file = 'pitch_plots'

    for ii in range(len(fdatasets)):

        if num_plots > len(fdatasets):
            for jj in range(len(fdatasets), num_plots, 1):
                ax[jj].axis('off')
                ax0[jj].axis('off')
                # ax0[jj].axis('tight')
                ax1[jj].axis('off')
                ax2[jj].axis('off')
                ax3[jj].axis('off')

        print('\n', fdatasets[ii].split('/')[-1])
        deployment = fdatasets[ii].split('/')[-1].split('_')[0].split(
            'deployment')[-1]
        deployment = int(deployment)

        ds = xr.open_dataset(fdatasets[ii], mask_and_scale=False)
        time = ds['time'].values
        '''
        variable list
        '''
        var_list = cf.notin_list(ds.data_vars.keys(), ['time', '_qc_'])

        z_name, z_data, z_unit, z_name, z_fill = cf.add_pressure_to_dictionary_of_sci_vars(
            ds)
        z_data, err_count_z = reject_err_data_1_dims(z_data,
                                                     z_fill[0],
                                                     r,
                                                     z_name[0],
                                                     n=5)

        if 'VELPT' in r:
            w_id, w_data, w_unit, w_name, w_fill = get_variable_data(
                ds, var_list, 'upward_velocity')
            w_data, err_count_w = reject_err_data_1_dims(w_data,
                                                         w_fill,
                                                         r,
                                                         w_id,
                                                         n=5)

            u_id, u_data, u_unit, u_name, u_fill = get_variable_data(
                ds, var_list, 'eastward_velocity')
            u_data, err_count_u = reject_err_data_1_dims(u_data,
                                                         u_fill,
                                                         r,
                                                         u_id,
                                                         n=5)

            v_id, v_data, v_unit, v_name, v_fill = get_variable_data(
                ds, var_list, 'northward_velocity')
            v_data, err_count_v = reject_err_data_1_dims(v_data,
                                                         v_fill,
                                                         r,
                                                         v_id,
                                                         n=5)

            roll_id, roll_data, roll_unit, roll_name, roll_fill = get_variable_data(
                ds, var_list, 'roll')
            roll_data, err_count_roll = reject_err_data_1_dims(roll_data,
                                                               roll_fill,
                                                               r,
                                                               roll_id,
                                                               n=5)

            pitch_id, pitch_data, pitch_unit, pitch_name, pitch_fill = get_variable_data(
                ds, var_list, 'pitch')
            pitch_data, err_count_pitch = reject_err_data_1_dims(pitch_data,
                                                                 pitch_fill,
                                                                 r,
                                                                 pitch_id,
                                                                 n=5)
            '''
            According to VELPT manufacturer, data are suspect when the instrument is tilted more than 20 degrees
            redmine ticket # 12960
            '''

            tilt_ind = np.logical_or(
                abs(pitch_data) > 200,
                abs(roll_data) > 200)
            percent_good = (
                (len(time) - len(time[tilt_ind])) / len(time)) * 100

        elif 'VEL3D' in r:
            w_id, w_data, w_unit, w_name, w_fill = get_variable_data(
                ds, var_list, 'upward_turbulent_velocity')
            w_data, err_count_w = reject_err_data_1_dims(w_data,
                                                         w_fill,
                                                         r,
                                                         w_id,
                                                         n=5)

            u_id, u_data, u_unit, u_name, u_fill = get_variable_data(
                ds, var_list, 'eastward_turbulent_velocity')
            u_data, err_count_u = reject_err_data_1_dims(u_data,
                                                         u_fill,
                                                         r,
                                                         u_id,
                                                         n=5)

            v_id, v_data, v_unit, v_name, v_fill = get_variable_data(
                ds, var_list, 'northward_turbulent_velocity')
            v_data, err_count_v = reject_err_data_1_dims(v_data,
                                                         v_fill,
                                                         r,
                                                         v_id,
                                                         n=5)
        '''
        Plot pressure
        '''
        ax1[ii].plot(time,
                     z_data,
                     'b.',
                     linestyle='None',
                     marker='.',
                     markersize=0.5)  #linestyle='--', linewidth=.6
        if 'VELPT' in r:
            ax1[ii].plot(time[tilt_ind],
                         z_data[tilt_ind],
                         'r.',
                         linestyle='None',
                         marker='.',
                         markersize=0.5,
                         label=str(round(100 - percent_good, 2)) + '%')
        prepare_axis(r,
                     time,
                     deployment,
                     ax1[ii],
                     ii,
                     len(fdatasets),
                     z_name[0],
                     z_unit[0],
                     err_count=err_count_z)

        fig1_file = fig1_file + str(deployment)

        if 'VELPT' in r:
            '''
            plot roll
            '''
            ax2[ii].plot(time,
                         roll_data,
                         'b.',
                         linestyle='None',
                         marker='.',
                         markersize=0.5)
            ax2[ii].plot(time[tilt_ind],
                         roll_data[tilt_ind],
                         'r.',
                         linestyle='None',
                         marker='.',
                         markersize=0.5,
                         label=str(round(100 - percent_good, 2)) + '%')
            prepare_axis(r,
                         time,
                         deployment,
                         ax2[ii],
                         ii,
                         len(fdatasets),
                         roll_name,
                         roll_unit,
                         err_count=err_count_roll)

            fig2_file = fig2_file + str(deployment)
            '''
            plot pitch
            '''
            ax3[ii].plot(time,
                         pitch_data,
                         'b.',
                         linestyle='None',
                         marker='.',
                         markersize=0.5)
            ax3[ii].plot(time[tilt_ind],
                         pitch_data[tilt_ind],
                         'r.',
                         linestyle='None',
                         marker='.',
                         markersize=0.5,
                         label=str(round(100 - percent_good, 2)) + '%')
            prepare_axis(r,
                         time,
                         deployment,
                         ax3[ii],
                         ii,
                         len(fdatasets),
                         pitch_name,
                         pitch_unit,
                         err_count=err_count_pitch)

            fig3_file = fig3_file + str(deployment)
        '''
        1D Quiver plot
        '''
        ax[ii].quiver(time,
                      0,
                      u_data,
                      v_data,
                      color='b',
                      units='y',
                      scale_units='y',
                      scale=1,
                      headlength=1,
                      headaxislength=1,
                      width=0.004,
                      alpha=0.5)

        if 'VELPT' in r:
            ax[ii].quiver(time[tilt_ind],
                          0,
                          u_data[tilt_ind],
                          v_data[tilt_ind],
                          color='r',
                          units='y',
                          scale_units='y',
                          scale=1,
                          headlength=1,
                          headaxislength=1,
                          width=0.004,
                          alpha=0.5,
                          label=str(round(100 - percent_good, 2)) + '%')

        uv_magnitude = np.sqrt(u_data**2 + v_data**2)
        uv_maxmag = np.nanmax(uv_magnitude)

        ax[ii].set_ylim(-uv_maxmag, uv_maxmag)
        prepare_axis(r,
                     time,
                     deployment,
                     ax[ii],
                     ii,
                     len(fdatasets),
                     'Current Velocity',
                     u_unit,
                     err_count=None)

        fig_file = fig_file + str(deployment)
        '''
        Plot u and v components
        '''
        ax0[ii].plot(time,
                     v_data,
                     'b.',
                     linestyle='None',
                     marker='.',
                     markersize=0.5,
                     label='V')
        if 'VELPT' in r:
            ax0[ii].plot(time[tilt_ind],
                         v_data[tilt_ind],
                         'r',
                         linestyle='None',
                         marker='.',
                         markersize=0.5,
                         label=str(round(100 - percent_good, 2)) + '%')
        ax0[ii].plot(time,
                     u_data,
                     'g.',
                     linestyle='None',
                     marker='.',
                     markersize=0.5,
                     label='U')
        if 'VELPT' in r:
            ax0[ii].plot(time[tilt_ind],
                         u_data[tilt_ind],
                         'y',
                         linestyle='None',
                         marker='.',
                         markersize=0.5,
                         label=str(round(100 - percent_good, 2)) + '%')
        ax0[ii].plot(time,
                     w_data,
                     'm.',
                     linestyle='None',
                     marker='.',
                     markersize=0.5,
                     label='W')
        if 'VELPT' in r:
            ax0[ii].plot(time[tilt_ind],
                         w_data[tilt_ind],
                         'c',
                         linestyle='None',
                         marker='.',
                         markersize=0.5,
                         label=str(round(100 - percent_good, 2)) + '%')

        prepare_axis(r,
                     time,
                     deployment,
                     ax0[ii],
                     ii,
                     len(fdatasets),
                     'Velocity Components',
                     u_unit,
                     err_count=None)

        fig0_file = fig0_file + str(deployment)

    save_file = os.path.join(save_dir, fig1_file)
    fig1.savefig(str(save_file), dpi=150, bbox_inches='tight')

    save_file = os.path.join(save_dir, fig_file)
    fig.savefig(str(save_file), dpi=150, bbox_inches='tight')

    save_file = os.path.join(save_dir, fig0_file)
    fig0.savefig(str(save_file), dpi=150, bbox_inches='tight')

    save_file = os.path.join(save_dir, fig2_file)
    fig2.savefig(str(save_file), dpi=150, bbox_inches='tight')

    save_file = os.path.join(save_dir, fig3_file)
    fig3.savefig(str(save_file), dpi=150, bbox_inches='tight')

    plt.close('all')
def main(url_list, sDir, plot_type, deployment_num, start_time, end_time, preferred_only, glider, zdbar, n_std, inpercentile, zcell_size):
    rd_list = []
    for uu in url_list:
        elements = uu.split('/')[-2].split('-')
        rd = '-'.join((elements[1], elements[2], elements[3], elements[4]))
        if rd not in rd_list and 'ENG' not in rd:
            rd_list.append(rd)

    for r in rd_list:
        print('\n{}'.format(r))
        datasets = []
        for u in url_list:
            splitter = u.split('/')[-2].split('-')
            rd_check = '-'.join((splitter[1], splitter[2], splitter[3], splitter[4]))
            if rd_check == r:
                udatasets = cf.get_nc_urls([u])
                datasets.append(udatasets)
        datasets = list(itertools.chain(*datasets))
        fdatasets = []
        if preferred_only == 'yes':
            # get the preferred stream information
            ps_df, n_streams = cf.get_preferred_stream_info(r)
            for index, row in ps_df.iterrows():
                for ii in range(n_streams):
                    try:
                        rms = '-'.join((r, row[ii]))
                    except TypeError:
                        continue
                    for dd in datasets:
                        spl = dd.split('/')[-2].split('-')
                        catalog_rms = '-'.join((spl[1], spl[2], spl[3], spl[4], spl[5], spl[6]))
                        fdeploy = dd.split('/')[-1].split('_')[0]
                        if rms == catalog_rms and fdeploy == row['deployment']:
                            fdatasets.append(dd)
        else:
            fdatasets = datasets

        main_sensor = r.split('-')[-1]
        fdatasets_sel = cf.filter_collocated_instruments(main_sensor, fdatasets)

        for fd in fdatasets_sel:
            part_d = fd.split('/')[-1]
            print(part_d)
            ds = xr.open_dataset(fd, mask_and_scale=False)
            ds = ds.swap_dims({'obs': 'time'})

            fname, subsite, refdes, method, stream, deployment = cf.nc_attributes(fd)
            array = subsite[0:2]
            sci_vars = cf.return_science_vars(stream)

            if 'CE05MOAS' in r or 'CP05MOAS' in r:  # for coastal gliders, get m_water_depth for bathymetry
                eng = '-'.join((r.split('-')[0], r.split('-')[1], '00-ENG000000', method, 'glider_eng'))
                eng_url = [s for s in url_list if eng in s]
                if len(eng_url) == 1:
                    eng_datasets = cf.get_nc_urls(eng_url)
                    # filter out collocated datasets
                    eng_dataset = [j for j in eng_datasets if (eng in j.split('/')[-1] and deployment in j.split('/')[-1])]
                    if len(eng_dataset) > 0:
                        ds_eng = xr.open_dataset(eng_dataset[0], mask_and_scale=False)
                        t_eng = ds_eng['time'].values
                        m_water_depth = ds_eng['m_water_depth'].values

                        # m_altimeter_status = 0 means a good reading (not nan or -1)
                        eng_ind = ds_eng['m_altimeter_status'].values == 0
                        m_water_depth = m_water_depth[eng_ind]
                        t_eng = t_eng[eng_ind]
                    else:
                        print('No engineering file for deployment {}'.format(deployment))

            if deployment_num is not None:
                if int(deployment.split('0')[-1]) is not deployment_num:
                    print(type(int(deployment.split('0')[-1])), type(deployment_num))
                    continue

            if start_time is not None and end_time is not None:
                ds = ds.sel(time=slice(start_time, end_time))
                if len(ds['time'].values) == 0:
                    print('No data to plot for specified time range: ({} to {})'.format(start_time, end_time))
                    continue
                stime = start_time.strftime('%Y-%m-%d')
                etime = end_time.strftime('%Y-%m-%d')
                ext = stime + 'to' + etime  # .join((ds0_method, ds1_method
                save_dir = os.path.join(sDir, array, subsite, refdes, plot_type, deployment, ext)
            else:
                save_dir = os.path.join(sDir, array, subsite, refdes, plot_type, deployment)

            cf.create_dir(save_dir)

            tm = ds['time'].values

            # get pressure variable
            ds_vars = list(ds.data_vars.keys()) + [x for x in ds.coords.keys() if 'pressure' in x]

            y, y_units, press = cf.add_pressure_to_dictionary_of_sci_vars(ds)
            print(y_units, press)

            # press = pf.pressure_var(ds, ds_vars)
            # print(press)
            # y = ds[press].values
            # y_units = ds[press].units

            for sv in sci_vars:
                print(sv)
                if 'sci_water_pressure' not in sv:
                    z = ds[sv].values
                    fv = ds[sv]._FillValue
                    z_units = ds[sv].units

                    # Check if the array is all NaNs
                    if sum(np.isnan(z)) == len(z):
                        print('Array of all NaNs - skipping plot.')
                        continue

                    # Check if the array is all fill values
                    elif len(z[z != fv]) == 0:
                        print('Array of all fill values - skipping plot.')
                        continue

                    else:

                        """
                        clean up data
                        """
                        # reject erroneous data
                        dtime, zpressure, ndata, lenfv, lennan, lenev, lengr, global_min, global_max = \
                                                                        cf.reject_erroneous_data(r, sv, tm, y, z, fv)

                        # get rid of 0.0 data
                        if 'CTD' in r:
                            ind = zpressure > 0.0
                        else:
                            ind = ndata > 0.0

                        lenzero = np.sum(~ind)
                        dtime = dtime[ind]
                        zpressure = zpressure[ind]
                        ndata = ndata[ind]

                        # creating data groups
                        columns = ['tsec', 'dbar', str(sv)]
                        min_r = int(round(min(zpressure) - zcell_size))
                        max_r = int(round(max(zpressure) + zcell_size))
                        ranges = list(range(min_r, max_r, zcell_size))

                        groups, d_groups = gt.group_by_depth_range(dtime, zpressure, ndata, columns, ranges)

                        #  rejecting timestamps from percentile analysis
                        y_avg, n_avg, n_min, n_max, n0_std, n1_std, l_arr, time_ex = cf.reject_timestamps_in_groups(
                            groups, d_groups, n_std, inpercentile)

                        t_nospct, z_nospct, y_nospct = cf.reject_suspect_data(dtime, zpressure, ndata, time_ex)

                        print('removed {} data points using {} percentile of data grouped in {} dbar segments'.format(
                                                    len(zpressure) - len(z_nospct), inpercentile, zcell_size))

                        # reject time range from data portal file export
                        t_portal, z_portal, y_portal = cf.reject_timestamps_dataportal(subsite, r,
                                                                                    t_nospct, y_nospct, z_nospct)
                        print('removed {} data points using visual inspection of data'.format(len(z_nospct) - len(z_portal)))

                        # reject data in a depth range
                        if zdbar:
                            y_ind = y_portal < zdbar
                            n_zdbar = np.sum(~y_ind)
                            t_array = t_portal[y_ind]
                            y_array = y_portal[y_ind]
                            z_array = z_portal[y_ind]
                        else:
                            n_zdbar = 0
                            t_array = t_portal
                            y_array = y_portal
                            z_array = z_portal
                        print('{} in water depth > {} dbar'.format(n_zdbar, zdbar))

                    """
                    Plot data
                    """

                    if len(dtime) > 0:
                        sname = '-'.join((r, method, sv))

                        clabel = sv + " (" + z_units + ")"
                        ylabel = press[0] + " (" + y_units[0] + ")"

                        if glider == 'no':
                            t_eng = None
                            m_water_depth = None

                        # plot non-erroneous data
                        fig, ax, bar = pf.plot_xsection(subsite, dtime, zpressure, ndata, clabel, ylabel,
                                                        t_eng, m_water_depth, inpercentile, stdev=None)

                        t0 = pd.to_datetime(dtime.min()).strftime('%Y-%m-%dT%H:%M:%S')
                        t1 = pd.to_datetime(dtime.max()).strftime('%Y-%m-%dT%H:%M:%S')
                        title = ' '.join((deployment, refdes, method)) + '\n' + t0 + ' to ' + t1

                        ax.set_title(title, fontsize=9)
                        leg_text = (
                            'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}], '
                            '{} zeros'.format(lenfv, lennan, lenev, lengr, global_min, global_max, lenzero),
                        )
                        ax.legend(leg_text, loc='upper center', bbox_to_anchor=(0.5, -0.17), fontsize=6)
                        fig.tight_layout()
                        sfile = '_'.join(('rm_erroneous_data', sname))
                        pf.save_fig(save_dir, sfile)

                        # plots removing all suspect data
                        if len(t_array) > 0:
                            if len(t_array) != len(dtime):
                                # plot bathymetry only within data time ranges
                                if glider == 'yes':
                                    eng_ind = (t_eng >= np.min(t_array)) & (t_eng <= np.max(t_array))
                                    t_eng = t_eng[eng_ind]
                                    m_water_depth = m_water_depth[eng_ind]

                                fig, ax, bar = pf.plot_xsection(subsite, t_array, y_array, z_array, clabel, ylabel,
                                                                t_eng, m_water_depth, inpercentile, stdev=None)

                                t0 = pd.to_datetime(t_array.min()).strftime('%Y-%m-%dT%H:%M:%S')
                                t1 = pd.to_datetime(t_array.max()).strftime('%Y-%m-%dT%H:%M:%S')
                                title = ' '.join((deployment, refdes, method)) + '\n' + t0 + ' to ' + t1

                                ax.set_title(title, fontsize=9)
                                if zdbar:
                                    leg_text = (
                                        'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}], '
                                        '{} zeros'.format(lenfv, lennan, lenev, lengr, global_min, global_max, lenzero)
                                        + '\nremoved {} in the upper and lower {}th percentile of data grouped in {} dbar segments'.format(
                                            len(zpressure) - len(z_nospct), inpercentile, zcell_size)
                                        + '\nexcluded {} suspect data points when inspected visually'.format(
                                            len(z_nospct) - len(z_portal))
                                        + '\nexcluded {} suspect data in water depth greater than {} dbar'.format(n_zdbar,
                                                                                                             zdbar),
                                    )
                                else:
                                    leg_text = (
                                        'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}], '
                                        '{} zeros'.format(lenfv, lennan, lenev, lengr, global_min, global_max, lenzero)
                                        + '\nremoved {} in the upper and lower {}th percentile of data grouped in {} dbar segments'.format(
                                            len(zpressure) - len(z_nospct), inpercentile, zcell_size)
                                        + '\nexcluded {} suspect data points when inspected visually'.format(
                                            len(z_nospct) - len(z_portal)),
                                    )
                                ax.legend(leg_text, loc='upper center', bbox_to_anchor=(0.5, -0.17), fontsize=6)
                                fig.tight_layout()

                                sfile = '_'.join(('rm_suspect_data', sname))
                                pf.save_fig(save_dir, sfile)
Exemple #8
0
def main(url_list, sDir, stime, etime):
    if len(url_list) != 2:
        print('Please provide 2 reference designators for plotting')
    else:
        uu0 = url_list[0]
        uu1 = url_list[1]
        rd0 = uu0.split('/')[-2][20:47]
        rd1 = uu1.split('/')[-2][20:47]
        array = rd0[0:2]
        inst = rd0.split('-')[-1]

        datasets0 = []
        datasets1 = []
        for i in range(len(url_list)):
            udatasets = cf.get_nc_urls([url_list[i]])
            if i == 0:
                datasets0.append(udatasets)
            else:
                datasets1.append(udatasets)

        datasets0 = list(itertools.chain(*datasets0))
        datasets1 = list(itertools.chain(*datasets1))

        main_sensor0 = rd0.split('-')[-1]
        main_sensor1 = rd1.split('-')[-1]
        fdatasets0_sel = cf.filter_collocated_instruments(
            main_sensor0, datasets0)
        fdatasets1_sel = cf.filter_collocated_instruments(
            main_sensor1, datasets1)

        deployments = [
            dd.split('/')[-1].split('_')[0] for dd in fdatasets0_sel
        ]

        for d in deployments:
            fd0 = [x for x in fdatasets0_sel if d in x]
            fd1 = [x for x in fdatasets1_sel if d in x]

            ds0 = xr.open_dataset(fd0[0], mask_and_scale=False)
            ds0 = ds0.swap_dims({'obs': 'time'})
            ds1 = xr.open_dataset(fd1[0], mask_and_scale=False)
            ds1 = ds1.swap_dims({'obs': 'time'})

            if stime is not None and etime is not None:
                ds0 = ds0.sel(time=slice(stime, etime))
                ds1 = ds1.sel(time=slice(stime, etime))
                if len(ds0['time'].values) == 0:
                    print(
                        'No data to plot for specified time range: ({} to {})'.
                        format(start_time, end_time))
                    continue

            fname, subsite, refdes, method, stream, deployment = cf.nc_attributes(
                fd0[0])
            sci_vars = cf.return_science_vars(stream)

            save_dir_profile = os.path.join(sDir, array, subsite, inst,
                                            'profile_plots', deployment)
            cf.create_dir(save_dir_profile)

            # get pressure variable
            pvarname, y1, y_units, press, y_fillvalue = cf.add_pressure_to_dictionary_of_sci_vars(
                ds0)

            for sv in sci_vars:
                print('')
                print(sv)
                if 'pressure' not in sv:
                    fig, ax = plt.subplots()
                    plt.margins(y=.08, x=.02)
                    plt.grid()
                    title = ' '.join((deployment, subsite, inst, method))
                    sname = '-'.join((subsite, inst, method, sv))
                    for i in range(len(url_list)):
                        if i == 0:
                            ds = ds0
                        else:
                            ds = ds1
                        t = ds['time'].values
                        zpressure = ds[pvarname].values
                        z1 = ds[sv].values
                        fv = ds[sv]._FillValue
                        sv_units = ds[sv].units

                        # Check if the array is all NaNs
                        if sum(np.isnan(z1)) == len(z1):
                            print('Array of all NaNs - skipping plot.')
                            continue

                        # Check if the array is all fill values
                        elif len(z1[z1 != fv]) == 0:
                            print('Array of all fill values - skipping plot.')
                            continue

                        else:
                            # get rid of 0.0 data
                            if sv == 'salinity':
                                ind = z1 > 1
                            elif sv == 'density':
                                ind = z1 > 1000
                            elif sv == 'conductivity':
                                ind = z1 > 0.1
                            elif sv == 'dissolved_oxygen':
                                ind = z1 > 160
                            elif sv == 'estimated_oxygen_concentration':
                                ind = z1 > 200
                            else:
                                ind = z1 > 0
                            # if sv == 'sci_flbbcd_chlor_units':
                            #     ind = ndata < 7.5
                            # elif sv == 'sci_flbbcd_cdom_units':
                            #     ind = ndata < 25
                            # else:
                            #     ind = ndata > 0.0

                            # if 'CTD' in r:
                            #     ind = zpressure > 0.0
                            # else:
                            #     ind = ndata > 0.0

                            lenzero = np.sum(~ind)
                            dtime = t[ind]
                            zpressure = zpressure[ind]
                            zdata = z1[ind]

                            if len(dtime) > 0:
                                ax.scatter(zdata,
                                           zpressure,
                                           s=2,
                                           edgecolor='None')

                    xlabel = sv + " (" + sv_units + ")"
                    ylabel = press[0] + " (" + y_units[0] + ")"

                    ax.invert_yaxis()
                    # plt.xlim([-0.5, 0.5])
                    ax.set_xlabel(xlabel, fontsize=9)
                    ax.set_ylabel(ylabel, fontsize=9)
                    ax.set_title(title + '\nWFP02 (blue) & WFP03 (orange)',
                                 fontsize=9)
                    fig.tight_layout()
                    pf.save_fig(save_dir_profile, sname)
def main(url_list, sDir, deployment_num, start_time, end_time, preferred_only,
         zdbar, n_std, inpercentile, zcell_size):
    rd_list = []
    for uu in url_list:
        elements = uu.split('/')[-2].split('-')
        rd = '-'.join((elements[1], elements[2], elements[3], elements[4]))
        if rd not in rd_list and 'ENG' not in rd and 'ADCP' not in rd:
            rd_list.append(rd)

    for r in rd_list:
        print('\n{}'.format(r))
        datasets = []
        for u in url_list:
            splitter = u.split('/')[-2].split('-')
            rd_check = '-'.join(
                (splitter[1], splitter[2], splitter[3], splitter[4]))
            if rd_check == r:
                udatasets = cf.get_nc_urls([u])
                datasets.append(udatasets)
        datasets = list(itertools.chain(*datasets))
        fdatasets = []
        if preferred_only == 'yes':
            # get the preferred stream information
            ps_df, n_streams = cf.get_preferred_stream_info(r)
            for index, row in ps_df.iterrows():
                for ii in range(n_streams):
                    try:
                        rms = '-'.join((r, row[ii]))
                    except TypeError:
                        continue
                    for dd in datasets:
                        spl = dd.split('/')[-2].split('-')
                        catalog_rms = '-'.join(
                            (spl[1], spl[2], spl[3], spl[4], spl[5], spl[6]))
                        fdeploy = dd.split('/')[-1].split('_')[0]
                        if rms == catalog_rms and fdeploy == row['deployment']:
                            fdatasets.append(dd)
        else:
            fdatasets = datasets

        main_sensor = r.split('-')[-1]
        fdatasets_sel = cf.filter_collocated_instruments(
            main_sensor, fdatasets)

        for fd in fdatasets_sel:
            part_d = fd.split('/')[-1]
            print('\n{}'.format(part_d))
            ds = xr.open_dataset(fd, mask_and_scale=False)
            ds = ds.swap_dims({'obs': 'time'})

            fname, subsite, refdes, method, stream, deployment = cf.nc_attributes(
                fd)
            array = subsite[0:2]
            sci_vars = cf.return_science_vars(stream)

            # if 'CE05MOAS' in r or 'CP05MOAS' in r:  # for coastal gliders, get m_water_depth for bathymetry
            #     eng = '-'.join((r.split('-')[0], r.split('-')[1], '00-ENG000000', method, 'glider_eng'))
            #     eng_url = [s for s in url_list if eng in s]
            #     if len(eng_url) == 1:
            #         eng_datasets = cf.get_nc_urls(eng_url)
            #         # filter out collocated datasets
            #         eng_dataset = [j for j in eng_datasets if (eng in j.split('/')[-1] and deployment in j.split('/')[-1])]
            #         if len(eng_dataset) > 0:
            #             ds_eng = xr.open_dataset(eng_dataset[0], mask_and_scale=False)
            #             t_eng = ds_eng['time'].values
            #             m_water_depth = ds_eng['m_water_depth'].values
            #
            #             # m_altimeter_status = 0 means a good reading (not nan or -1)
            #             try:
            #                 eng_ind = ds_eng['m_altimeter_status'].values == 0
            #             except KeyError:
            #                 eng_ind = (~np.isnan(m_water_depth)) & (m_water_depth >= 0)
            #
            #             m_water_depth = m_water_depth[eng_ind]
            #             t_eng = t_eng[eng_ind]
            #
            #             # get rid of any remaining nans or fill values
            #             eng_ind2 = (~np.isnan(m_water_depth)) & (m_water_depth >= 0)
            #             m_water_depth = m_water_depth[eng_ind2]
            #             t_eng = t_eng[eng_ind2]
            #         else:
            #             print('No engineering file for deployment {}'.format(deployment))
            #             m_water_depth = None
            #             t_eng = None
            #     else:
            #         m_water_depth = None
            #         t_eng = None
            # else:
            #     m_water_depth = None
            #     t_eng = None

            if deployment_num is not None:
                if int(int(deployment[-4:])) is not deployment_num:
                    print(type(int(deployment[-4:])), type(deployment_num))
                    continue

            if start_time is not None and end_time is not None:
                ds = ds.sel(time=slice(start_time, end_time))
                if len(ds['time'].values) == 0:
                    print(
                        'No data to plot for specified time range: ({} to {})'.
                        format(start_time, end_time))
                    continue
                stime = start_time.strftime('%Y-%m-%d')
                etime = end_time.strftime('%Y-%m-%d')
                ext = stime + 'to' + etime  # .join((ds0_method, ds1_method
                save_dir_profile = os.path.join(sDir, array, subsite, refdes,
                                                'profile_plots', deployment,
                                                ext)
                save_dir_xsection = os.path.join(sDir, array, subsite, refdes,
                                                 'xsection_plots', deployment,
                                                 ext)
                save_dir_4d = os.path.join(sDir, array, subsite, refdes,
                                           'xsection_plots_4d', deployment,
                                           ext)
            else:
                save_dir_profile = os.path.join(sDir, array, subsite, refdes,
                                                'profile_plots', deployment)
                save_dir_xsection = os.path.join(sDir, array, subsite, refdes,
                                                 'xsection_plots', deployment)
                save_dir_4d = os.path.join(sDir, array, subsite, refdes,
                                           'xsection_plots_4d', deployment)

            texclude_dir = os.path.join(sDir, array, subsite, refdes,
                                        'time_to_exclude')
            cf.create_dir(texclude_dir)

            time1 = ds['time'].values
            try:
                ds_lat1 = ds['lat'].values
            except KeyError:
                ds_lat1 = None
                print('No latitude variable in file')
            try:
                ds_lon1 = ds['lon'].values
            except KeyError:
                ds_lon1 = None
                print('No longitude variable in file')

            # get pressure variable
            pvarname, y1, y_units, press, y_fillvalue = cf.add_pressure_to_dictionary_of_sci_vars(
                ds)

            # prepare file to list timestamps with suspect data  for each data parameter
            stat_data = pd.DataFrame(
                columns=['deployments', 'time_to_exclude'])
            file_exclude = '{}/{}_{}_{}_excluded_timestamps.csv'.format(
                texclude_dir, deployment, refdes, method)
            stat_data.to_csv(file_exclude, index=True)

            # loop through sensor-data parameters
            for sv in sci_vars:
                print(sv)
                if 'pressure' not in sv:
                    z1 = ds[sv].values
                    fv = ds[sv]._FillValue
                    sv_units = ds[sv].units

                    # Check if the array is all NaNs
                    if sum(np.isnan(z1)) == len(z1):
                        print('Array of all NaNs - skipping plot.')
                        continue

                    # Check if the array is all fill values
                    elif len(z1[z1 != fv]) == 0:
                        print('Array of all fill values - skipping plot.')
                        continue

                    else:
                        # remove unreasonable pressure data (e.g. for surface piercing profilers)
                        if zdbar:
                            po_ind = (0 < y1) & (y1 < zdbar)
                            n_zdbar = np.sum(~po_ind)
                            tm = time1[po_ind]
                            y = y1[po_ind]
                            z = z1[po_ind]
                            ds_lat = ds_lat1[po_ind]
                            ds_lon = ds_lon1[po_ind]
                            print('{} in water depth > {} dbar'.format(
                                n_zdbar, zdbar))
                        else:
                            tm = time1
                            y = y1
                            z = z1
                            ds_lat = ds_lat1
                            ds_lon = ds_lon1

                        # reject erroneous data
                        dtime, zpressure, ndata, lenfv, lennan, lenev, lengr, global_min, global_max, lat, lon = \
                            cf.reject_erroneous_data(r, sv, tm, y, z, fv, ds_lat, ds_lon)

                        # get rid of 0.0 data
                        if sv == 'salinity':
                            ind = ndata > 30
                        elif sv == 'density':
                            ind = ndata > 1022.5
                        elif sv == 'conductivity':
                            ind = ndata > 3.45
                        else:
                            ind = ndata > 0
                        # if sv == 'sci_flbbcd_chlor_units':
                        #     ind = ndata < 7.5
                        # elif sv == 'sci_flbbcd_cdom_units':
                        #     ind = ndata < 25
                        # else:
                        #     ind = ndata > 0.0

                        # if 'CTD' in r:
                        #     ind = zpressure > 0.0
                        # else:
                        #     ind = ndata > 0.0

                        lenzero = np.sum(~ind)
                        dtime = dtime[ind]
                        zpressure = zpressure[ind]
                        ndata = ndata[ind]
                        if ds_lat is not None and ds_lon is not None:
                            lat = lat[ind]
                            lon = lon[ind]
                        else:
                            lat = None
                            lon = None

                        if len(dtime) > 0:
                            # reject time range from data portal file export
                            t_portal, z_portal, y_portal, lat_portal, lon_portal = \
                                cf.reject_timestamps_dataportal(subsite, r, dtime, zpressure, ndata, lat, lon)

                            print(
                                'removed {} data points using visual inspection of data'
                                .format(len(ndata) - len(z_portal)))

                            # create data groups
                            if len(y_portal) > 0:
                                columns = ['tsec', 'dbar', str(sv)]
                                min_r = int(round(min(y_portal) - zcell_size))
                                max_r = int(round(max(y_portal) + zcell_size))
                                ranges = list(range(min_r, max_r, zcell_size))

                                groups, d_groups = gt.group_by_depth_range(
                                    t_portal, y_portal, z_portal, columns,
                                    ranges)

                                if 'scatter' in sv:
                                    n_std = None  # to use percentile
                                else:
                                    n_std = n_std

                                #  identifying timestamps from percentile analysis
                                y_avg, n_avg, n_min, n_max, n0_std, n1_std, l_arr, time_ex = cf.reject_timestamps_in_groups(
                                    groups, d_groups, n_std, inpercentile)
                                """
                                writing timestamps to .csv file to use with data_range.py script
                                """
                                if len(time_ex) != 0:
                                    t_exclude = time_ex[0]
                                    for i in range(
                                            len(time_ex))[1:len(time_ex)]:
                                        t_exclude = '{}, {}'.format(
                                            t_exclude, time_ex[i])

                                    stat_data = pd.DataFrame(
                                        {
                                            'deployments': deployment,
                                            'time_to_exclude': t_exclude
                                        },
                                        index=[sv])
                                    stat_data.to_csv(file_exclude,
                                                     index=True,
                                                     mode='a',
                                                     header=False)

                                #  rejecting timestamps from percentile analysis
                                if len(time_ex) > 0:
                                    t_nospct, z_nospct, y_nospct = cf.reject_suspect_data(
                                        t_portal, y_portal, z_portal, time_ex)
                                else:
                                    t_nospct = t_portal
                                    z_nospct = z_portal
                                    y_nospct = y_portal
                                """
                                Plot data
                                """
                                if len(t_nospct) > 0:
                                    if len(t_nospct) != len(dtime):
                                        cf.create_dir(save_dir_profile)
                                        cf.create_dir(save_dir_xsection)
                                        sname = '-'.join((r, method, sv))
                                        sfile = '_'.join(
                                            ('rm_suspect_data', sname,
                                             pd.to_datetime(
                                                 t_nospct.min()).strftime(
                                                     '%Y%m%d')))

                                        t0 = pd.to_datetime(
                                            t_nospct.min()).strftime(
                                                '%Y-%m-%dT%H:%M:%S')
                                        t1 = pd.to_datetime(
                                            t_nospct.max()).strftime(
                                                '%Y-%m-%dT%H:%M:%S')
                                        title = ' '.join(
                                            (deployment, refdes,
                                             method)) + '\n' + t0 + ' to ' + t1

                                        if zdbar:
                                            leg_text = (
                                                'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges '
                                                '[{} - {}], {} unreasonable values'
                                                .format(
                                                    lenfv, lennan, lenev,
                                                    lengr, global_min,
                                                    global_max, lenzero) +
                                                '\nremoved {} in the upper and lower {} percentile of data grouped in {} '
                                                'dbar segments'.format(
                                                    len(z_portal) -
                                                    len(z_nospct),
                                                    inpercentile, zcell_size) +
                                                '\nexcluded {} suspect data points when inspected visually'
                                                .format(
                                                    len(ndata) - len(z_portal))
                                                +
                                                '\nexcluded {} suspect data in water depth greater than {} dbar'
                                                .format(n_zdbar, zdbar), )

                                        elif n_std:
                                            leg_text = (
                                                'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}], '
                                                '{} unreasonable values'.
                                                format(lenfv, lennan, lenev,
                                                       lengr, global_min,
                                                       global_max, lenzero) +
                                                '\nremoved {} data points +/- {} SD of data grouped in {} dbar segments'
                                                .format(
                                                    len(z_portal) -
                                                    len(z_nospct), n_std,
                                                    zcell_size) +
                                                '\nexcluded {} suspect data points when inspected visually'
                                                .format(
                                                    len(ndata) -
                                                    len(z_portal)), )
                                        else:
                                            leg_text = (
                                                'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}], '
                                                '{} unreasonable values'.
                                                format(lenfv, lennan, lenev,
                                                       lengr, global_min,
                                                       global_max, lenzero) +
                                                '\nremoved {} in the upper and lower {} percentile of data grouped in {} dbar segments'
                                                .format(
                                                    len(z_portal) -
                                                    len(z_nospct),
                                                    inpercentile, zcell_size) +
                                                '\nexcluded {} suspect data points when inspected visually'
                                                .format(
                                                    len(ndata) -
                                                    len(z_portal)), )
                                        '''
                                        profile plot
                                        '''
                                        xlabel = sv + " (" + sv_units + ")"
                                        ylabel = press[0] + " (" + y_units[
                                            0] + ")"
                                        clabel = 'Time'

                                        # plot non-erroneous data
                                        print('plotting profile')
                                        fig, ax = pf.plot_profiles(z_nospct,
                                                                   y_nospct,
                                                                   t_nospct,
                                                                   ylabel,
                                                                   xlabel,
                                                                   clabel,
                                                                   stdev=None)

                                        ax.set_title(title, fontsize=9)
                                        ax.plot(n_avg, y_avg, '-k')
                                        #ax.fill_betweenx(y_avg, n0_std, n1_std, color='m', alpha=0.2)
                                        ax.legend(leg_text,
                                                  loc='upper center',
                                                  bbox_to_anchor=(0.5, -0.17),
                                                  fontsize=6)
                                        fig.tight_layout()
                                        pf.save_fig(save_dir_profile, sfile)
                                        '''
                                        xsection plot
                                        '''
                                        print('plotting xsection')
                                        clabel = sv + " (" + sv_units + ")"
                                        ylabel = press[0] + " (" + y_units[
                                            0] + ")"

                                        # plot bathymetry only within data time ranges
                                        # if t_eng is not None:
                                        #     eng_ind = (t_eng >= np.nanmin(t_array)) & (t_eng <= np.nanmax(t_array))
                                        #     t_eng = t_eng[eng_ind]
                                        #     m_water_depth = m_water_depth[eng_ind]

                                        # plot non-erroneous data
                                        fig, ax, bar = pf.plot_xsection(
                                            subsite,
                                            t_nospct,
                                            y_nospct,
                                            z_nospct,
                                            clabel,
                                            ylabel,
                                            t_eng=None,
                                            m_water_depth=None,
                                            inpercentile=inpercentile,
                                            stdev=None)

                                        ax.set_title(title, fontsize=9)
                                        ax.legend(leg_text,
                                                  loc='upper center',
                                                  bbox_to_anchor=(0.5, -0.17),
                                                  fontsize=6)
                                        fig.tight_layout()
                                        pf.save_fig(save_dir_xsection, sfile)
def plot_velocity_variables(r, fdatasets, num_plots, save_dir):

    fig0_0x, ax0_0x = pyplot.subplots(nrows=num_plots, ncols=1, sharey=True)
    fig0_0x.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0)
    fig0_0x_file = 'v_plots_xsection'

    fig0_0p, ax0_0p = pyplot.subplots(nrows=num_plots, ncols=1, sharey=True)
    fig0_0p.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0)
    fig0_0p_file = 'v_plots_profile'

    fig0_1x, ax0_1x = pyplot.subplots(nrows=num_plots, ncols=1, sharey=True)
    fig0_1x.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0)
    fig0_1x_file = 'u_plots_xsection'

    fig0_1p, ax0_1p = pyplot.subplots(nrows=num_plots, ncols=1, sharey=True)
    fig0_1p.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0)
    fig0_1p_file = 'u_plots_profile'

    fig0_2x, ax0_2x = pyplot.subplots(nrows=num_plots, ncols=1, sharey=True)
    fig0_2x.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0)
    fig0_2x_file = 'w_plots_xsection'

    fig0_2p, ax0_2p = pyplot.subplots(nrows=num_plots, ncols=1, sharey=True)
    fig0_2p.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0)
    fig0_2p_file = 'w_plots_profile'

    # fig1, ax1 = pyplot.subplots(nrows=num_plots, ncols=1, sharey=True)
    # fig1.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0)
    # fig1_file = 'Calculated_current_plots'

    fig2x, ax2x = pyplot.subplots(nrows=num_plots, ncols=1, sharey=True)
    fig2x.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0)
    fig2x_file = 'roll_plots_xsection'

    fig2p, ax2p = pyplot.subplots(nrows=num_plots, ncols=1, sharey=True)
    fig2p.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0)
    fig2p_file = 'roll_plots_profile'

    fig3x, ax3x = pyplot.subplots(nrows=num_plots, ncols=1, sharey=True)
    fig3x.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0)
    fig3x_file = 'pitch_plots_xsection'

    fig3p, ax3p = pyplot.subplots(nrows=num_plots, ncols=1, sharey=True)
    fig3p.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0)
    fig3p_file = 'pitch_plots_profile'

    fig4x, ax4x = pyplot.subplots(nrows=num_plots, ncols=1, sharey=True)
    fig4x.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0)
    fig4x_file = 'calculated_current_magnitude_plots_xsection'

    fig4p, ax4p = pyplot.subplots(nrows=num_plots, ncols=1, sharey=True)
    fig4p.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0)
    fig4p_file = 'calculated_current_magnitude_plots_profile'

    if num_plots > len(fdatasets):
        for jj in range(len(fdatasets), num_plots, 1):
            ax0_0x[jj].axis('off')
            ax0_1x[jj].axis('off')
            ax0_2x[jj].axis('off')
            ax0_0p[jj].axis('off')
            ax0_1p[jj].axis('off')
            ax0_2p[jj].axis('off')
            # ax1[jj].axis('off')
            ax2p[jj].axis('off')
            ax2x[jj].axis('off')

            ax3x[jj].axis('off')
            ax4x[jj].axis('off')
            ax3p[jj].axis('off')
            ax4p[jj].axis('off')

    if len(fdatasets) == 1:
        ax0_0x = [ax0_0x]
        ax0_1x = [ax0_1x]
        ax0_2x = [ax0_2x]
        ax0_0p = [ax0_0p]
        ax0_1p = [ax0_1p]
        ax0_2p = [ax0_2p]
        # ax1 = [ax1]
        ax2p = [ax2p]
        ax2x = [ax2x]
        ax3x = [ax3x]
        ax4x = [ax4x]
        ax3p = [ax3p]
        ax4p = [ax4p]

    for ii in range(len(fdatasets)):

        print('\n', fdatasets[ii].split('/')[-1])
        deployment = fdatasets[ii].split('/')[-1].split('_')[0].split('deployment')[-1]
        deployment = int(deployment)

        ds = xr.open_dataset(fdatasets[ii], mask_and_scale=False)
        time = ds['time'].values
        collection_method = ds.collection_method

        '''
        variable list
        '''
        var_list = cf.notin_list(ds.data_vars.keys(), ['time', '_qc_'])

        z_data, z_unit, z_name, z_fill = cf.add_pressure_to_dictionary_of_sci_vars(ds)
        z_data, err_count_z = reject_err_data_1_dims(z_data, z_fill[0], r, z_name[0], n=5)

        w_id, w_data, w_unit, w_name, w_fill = get_variable_data(ds, var_list, 'upward_velocity')
        w_data, err_count_w = reject_err_data_1_dims(w_data, w_fill, r, w_id, n=5)

        u_id, u_data, u_unit, u_name, u_fill = get_variable_data(ds, var_list, 'eastward_velocity')
        u_data, err_count_u = reject_err_data_1_dims(u_data, u_fill, r, u_id, n=5)

        v_id, v_data, v_unit, v_name, v_fill = get_variable_data(ds, var_list, 'northward_velocity')
        v_data, err_count_v = reject_err_data_1_dims(v_data, v_fill, r, v_id, n=5)

        roll_id, roll_data, roll_unit, roll_name, roll_fill = get_variable_data(ds, var_list, 'roll')
        roll_data, err_count_roll = reject_err_data_1_dims(roll_data, roll_fill, r, roll_id, n=5)

        pitch_id, pitch_data, pitch_unit, pitch_name, pitch_fill = get_variable_data(ds, var_list, 'pitch')
        pitch_data, err_count_pitch = reject_err_data_1_dims(pitch_data, pitch_fill, r, pitch_id, n=5)

        '''
        2D Quiver plot
        '''
        # ax1[ii].quiver(time, z_data, u_data, v_data, color='b', units='y', scale_units='y', scale=1, headlength=1,
        #                headaxislength=1, width=0.004, alpha=0.5)
        # M = np.sqrt(u_data ** 2 + v_data ** 2)
        # Q = ax1[ii].quiver(time[::100], z_data[::100], u_data[::100], v_data[::100], M[::100],
        #                    units='y', pivot='tip', width=0.022, scale=1 / 0.15)
        # css = ax1[ii].quiverkey(Q, 0.9, 0.9, 1, r'$1 \frac{m}{s}$', labelpos='E', coordinates='figure')
        # #
        # prepare_axis(css, fig1, ax1[ii],time, [z_name[0], z_unit[0]], ['Current Velocity', u_unit], r, deployment,  ii, len(fdatasets),
        #                    err_count=None)
        #
        # fig1_file = fig1_file + str(deployment)
        #
        '''
        plot roll
        '''
        # css = ax2p[ii].scatter(roll_data, z_data, c=time, s=2, edgecolor='None', cmap='rainbow')
        # prepare_axis_profile(css, fig2p, ax2p[ii], time, [z_name[0], z_unit[0]], [roll_name, roll_unit], r, deployment, ii,
        #                                                                     len(fdatasets), err_count=err_count_roll)
        # fig2p_file = fig2p_file + '_deployment' + str(deployment)+ '_' + \
        #              fdatasets[ii].split('/')[-1].split('_')[-1].split('.')[0]
        #
        #
        # css = ax2x[ii].scatter(time, z_data, c=roll_data, cmap='RdGy', s=2, edgecolor='None')
        # prepare_axis_xsection(css, fig2x, ax2x[ii], time, [z_name[0], z_unit[0]], [roll_name, roll_unit], r, deployment, ii,
        #                                                                     len(fdatasets), err_count=err_count_roll)
        # fig2x_file = fig2x_file + '_deployment' + str(deployment) + '_' + \
        #              fdatasets[ii].split('/')[-1].split('_')[-1].split('.')[0]
        # #
        '''
        plot pitch
        '''
        # css = ax3p[ii].scatter(pitch_data, z_data, c=time, s=2, edgecolor='None', cmap='rainbow')
        # prepare_axis_profile(css, fig3p, ax3p[ii], time, [z_name[0], z_unit[0]], [pitch_name, pitch_unit], r, deployment, ii,
        #                      len(fdatasets), err_count=err_count_pitch)
        # fig3p_file = fig3p_file + '_deployment' + str(deployment) + '_' + fdatasets[ii].split('/')[-1].split('_')[-1].split('.')[0]
        #
        # css = ax3x[ii].scatter(time, z_data, c=pitch_data, cmap='RdGy', s=2, edgecolor='None')
        # prepare_axis_xsection(css, fig3x, ax3x[ii], time, [z_name[0], z_unit[0]], [pitch_name, pitch_unit], r, deployment, ii,
        #                                                                     len(fdatasets), err_count=err_count_pitch)
        # fig3x_file = fig3x_file + '_deployment' + str(deployment) + '_' + fdatasets[ii].split('/')[-1].split('_')[-1].split('.')[0]

    #
    #     '''
    #     plot current magnitude
    #     '''
    #     uv_magnitude = np.sqrt(u_data ** 2 + v_data ** 2)
    #
    #     css = ax4p[ii].scatter(uv_magnitude, z_data, c=time, s=2, edgecolor='None', cmap='rainbow')
    #     prepare_axis_profile(css, fig4p, ax4p[ii], time, [z_name[0], z_unit[0]], ['[U,V] Current Velocity', u_unit],
    #                                                       r, deployment, ii, len(fdatasets), err_count=err_count_pitch)
    #
    #     fig4p_file = fig4p_file + '_deployment' + str(deployment) + '_' + \
    #                  fdatasets[ii].split('/')[-1].split('_')[-1].split('.')[0]
    #
    #     css = ax4x[ii].scatter(time, z_data, c=uv_magnitude, cmap='PuBu', s=2, edgecolor='None')
    #     prepare_axis_xsection(css, fig4x, ax4x[ii], time, [z_name[0], z_unit[0]], ['[U,V] Current Velocity', u_unit],
    #                                                                 r, deployment, ii, len(fdatasets), err_count=None)
    #     fig4x_file = fig4x_file + '_deployment' + str(deployment) + '_' + \
    #                 fdatasets[ii].split('/')[-1].split('_')[-1].split('.')[0]
    #
    #     '''
    #     Plot v component
    #     '''
    #     css = ax0_0p[ii].scatter(v_data, z_data, c=time, s=2, edgecolor='None', cmap='rainbow')
    #     prepare_axis_profile(css, fig0_0p, ax0_0p[ii], time, [z_name[0], z_unit[0]], ['[V] Velocity Components', u_unit],
    #                          r, deployment, ii, len(fdatasets), err_count=err_count_pitch)
    #
    #     fig0_0p_file = fig0_0p_file + '_deployment' + str(deployment) + '_' + \
    #                  fdatasets[ii].split('/')[-1].split('_')[-1].split('.')[0]
    #
    #     css = ax0_0x[ii].scatter(time, z_data, c=v_data, cmap='RdBu', s=2, edgecolor='None')
    #     prepare_axis_xsection(css, fig0_0x, ax0_0x[ii], time, [z_name[0], z_unit[0]], ['[V] Velocity Components', v_unit],
    #                                                         r, deployment, ii, len(fdatasets), err_count=err_count_v)
    #     fig0_0x_file = fig0_0x_file + '_deployment' + str(deployment) + '_' + \
    #                    fdatasets[ii].split('/')[-1].split('_')[-1].split('.')[0]
    # #
    #     '''
    #     Plot u component
    #     '''
    #     css = ax0_1p[ii].scatter(u_data, z_data, c=time, s=2, edgecolor='None', cmap='rainbow')
    #     prepare_axis_profile(css, fig0_1p, ax0_1p[ii], time, [z_name[0], z_unit[0]],
    #                          ['[U] Velocity Components', u_unit],
    #                          r, deployment, ii, len(fdatasets), err_count=err_count_pitch)
    #
    #     fig0_1p_file = fig0_1p_file + '_deployment' + str(deployment) + '_' + \
    #                    fdatasets[ii].split('/')[-1].split('_')[-1].split('.')[0]
    #
    #     css = ax0_1x[ii].scatter(time, z_data, c=u_data, cmap='RdBu', s=2, edgecolor='None')
    #     prepare_axis_xsection(css, fig0_1x, ax0_1x[ii], time, [z_name[0], z_unit[0]], ['[U] Velocity Components', u_unit],
    #                                                         r, deployment, ii, len(fdatasets), err_count=err_count_u)
    #
    #     fig0_1x_file = fig0_1x_file + '_deployment' + str(deployment) + '_' + \
    #                    fdatasets[ii].split('/')[-1].split('_')[-1].split('.')[0]
    #
    #     '''
    #     Plot w component
    #     '''
        css = ax0_2p[ii].scatter(w_data, z_data, c=time, s=2, edgecolor='None', cmap='rainbow')
        prepare_axis_profile(css, fig0_2p, ax0_2p[ii], time, [z_name[0], z_unit[0]],
                             ['[W] Velocity Components', u_unit],
                             r, deployment, ii, len(fdatasets), err_count=err_count_pitch)

        fig0_2p_file = fig0_2p_file + '_deployment' + str(deployment) + '_' + \
                       fdatasets[ii].split('/')[-1].split('_')[-1].split('.')[0]

        css = ax0_2x[ii].scatter(time, z_data, c=w_data, cmap='RdBu', s=2, edgecolor='None')
        prepare_axis_xsection(css, fig0_2x, ax0_2x[ii], time, [z_name[0], z_unit[0]], ['[W] Velocity Components', w_unit],
                                                             r, deployment, ii, len(fdatasets), err_count=err_count_w)

        fig0_2x_file = fig0_2x_file + '_deployment' + str(deployment) + '_' + \
                       fdatasets[ii].split('/')[-1].split('_')[-1].split('.')[0]
    #
    #
    # save_file = os.path.join(save_dir, fig1_file)
    # fig1.savefig(str(save_file), dpi=150, bbox_inches='tight')

    # save_file = os.path.join(save_dir, fig2p_file)
    # fig2p.savefig(str(save_file), dpi=150, bbox_inches='tight')
    #
    # save_file = os.path.join(save_dir, fig2x_file)
    # fig2x.savefig(str(save_file), dpi=150, bbox_inches='tight')

    # save_file = os.path.join(save_dir, fig3p_file)
    # fig3p.savefig(str(save_file), dpi=150, bbox_inches='tight')
    #
    # save_file = os.path.join(save_dir, fig3x_file)
    # fig3x.savefig(str(save_file), dpi=150, bbox_inches='tight')
    # #
    # save_file = os.path.join(save_dir, fig4p_file)
    # fig4p.savefig(str(save_file), dpi=150, bbox_inches='tight')
    #
    # save_file = os.path.join(save_dir, fig4x_file)
    # fig4x.savefig(str(save_file), dpi=150, bbox_inches='tight')
    #
    # save_file = os.path.join(save_dir, fig0_0p_file)
    # fig0_0p.savefig(str(save_file), dpi=150, bbox_inches='tight')
    #
    # save_file = os.path.join(save_dir, fig0_0x_file)
    # fig0_0x.savefig(str(save_file), dpi=150, bbox_inches='tight')
    # #
    # save_file = os.path.join(save_dir, fig0_1p_file)
    # fig0_1p.savefig(str(save_file), dpi=150, bbox_inches='tight')
    #
    # save_file = os.path.join(save_dir, fig0_1x_file)
    # fig0_1x.savefig(str(save_file), dpi=150, bbox_inches='tight')
    # #
    save_file = os.path.join(save_dir, fig0_2p_file)
    fig0_2p.savefig(str(save_file), dpi=150, bbox_inches='tight')

    save_file = os.path.join(save_dir, fig0_2x_file)
    fig0_2x.savefig(str(save_file), dpi=150, bbox_inches='tight')

    pyplot.close(fig0_0p)
    pyplot.close(fig0_0x)
    pyplot.close(fig0_1p)
    pyplot.close(fig0_1x)
    pyplot.close(fig0_2p)
    pyplot.close(fig0_2x)
    pyplot.close(fig2p)
    pyplot.close(fig2x)
    pyplot.close(fig3p)
    pyplot.close(fig3x)
    pyplot.close(fig4p)
    pyplot.close(fig4x)
def main(url_list, sDir, plot_type, deployment_num, start_time, end_time, method_num, zdbar, n_std, inpercentile, zcell_size):

    for i, u in enumerate(url_list):
        print('\nUrl {} of {}: {}'.format(i + 1, len(url_list), u))
        elements = u.split('/')[-2].split('-')
        r = '-'.join((elements[1], elements[2], elements[3], elements[4]))
        ms = u.split(r + '-')[1].split('/')[0]
        subsite = r.split('-')[0]
        array = subsite[0:2]
        main_sensor = r.split('-')[-1]

        # read URL to get data
        datasets = cf.get_nc_urls([u])
        datasets_sel = cf.filter_collocated_instruments(main_sensor, datasets)

        # get sci data review list
        dr_data = cf.refdes_datareview_json(r)

        ps_df, n_streams = cf.get_preferred_stream_info(r)

        # get end times of deployments
        deployments = []
        end_times = []
        for index, row in ps_df.iterrows():
            deploy = row['deployment']
            deploy_info = cf.get_deployment_information(dr_data, int(deploy[-4:]))
            deployments.append(int(deploy[-4:]))
            end_times.append(pd.to_datetime(deploy_info['stop_date']))

        # create a dictionary for science variables from analysis file
        stream_sci_vars_dict = dict()
        for x in dr_data['instrument']['data_streams']:
            dr_ms = '-'.join((x['method'], x['stream_name']))
            if ms == dr_ms:
                stream_sci_vars_dict[dr_ms] = dict(vars=dict())
                sci_vars = dict()
                for y in x['stream']['parameters']:
                    if y['data_product_type'] == 'Science Data':
                        sci_vars.update({y['name']: dict(db_units=y['unit'])})
                if len(sci_vars) > 0:
                    stream_sci_vars_dict[dr_ms]['vars'] = sci_vars

        for ii, d in enumerate(datasets_sel):
            part_d = d.split('/')[-1]
            print('\nDataset {} of {}: {}'.format(ii + 1, len(datasets_sel), part_d))
            with xr.open_dataset(d, mask_and_scale=False) as ds:
                ds = ds.swap_dims({'obs': 'time'})

            fname, subsite, refdes, method, stream, deployment = cf.nc_attributes(d)

            if method_num is not None:
                if method != method_num:
                    print(method_num, method)
                    continue


            if deployment_num is not None:
                if int(deployment.split('0')[-1]) is not deployment_num:
                    print(type(int(deployment.split('0')[-1])), type(deployment_num))
                    continue

            if start_time is not None and end_time is not None:
                ds = ds.sel(time=slice(start_time, end_time))
                if len(ds['time'].values) == 0:
                    print('No data to plot for specified time range: ({} to {})'.format(start_time, end_time))
                    continue
                stime = start_time.strftime('%Y-%m-%d')
                etime = end_time.strftime('%Y-%m-%d')
                ext = stime + 'to' + etime  # .join((ds0_method, ds1_method
                save_dir = os.path.join(sDir, array, subsite, refdes, plot_type, ms.split('-')[0], deployment, ext)
            else:
                save_dir = os.path.join(sDir, array, subsite, refdes, plot_type, ms.split('-')[0], deployment)

            cf.create_dir(save_dir)

            texclude_dir = os.path.join(sDir, array, subsite, refdes, 'time_to_exclude')
            cf.create_dir(texclude_dir)

            # initialize an empty data array for science variables in dictionary
            sci_vars_dict = cd.initialize_empty_arrays(stream_sci_vars_dict, ms)

            for var in list(sci_vars_dict[ms]['vars'].keys()):
                sh = sci_vars_dict[ms]['vars'][var]
                if ds[var].units == sh['db_units']:
                    if ds[var]._FillValue not in sh['fv']:
                        sh['fv'].append(ds[var]._FillValue)
                    if ds[var].units not in sh['units']:
                        sh['units'].append(ds[var].units)

                    sh['t'] = np.append(sh['t'], ds['time'].values) # t = ds['time'].values
                    sh['values'] = np.append(sh['values'], ds[var].values)  # z = ds[var].values

                    y, y_unit, y_name = cf.add_pressure_to_dictionary_of_sci_vars(ds)
                    sh['pressure'] = np.append(sh['pressure'], y)

            stat_data = pd.DataFrame(columns=['deployments', 'time_to_exclude'])
            file_exclude = '{}/{}_{}_{}_excluded_timestamps.csv'.format(texclude_dir,
                                                                                   deployment, refdes, method)
            stat_data.to_csv(file_exclude, index=True)
            for m, n in sci_vars_dict.items():
                for sv, vinfo in n['vars'].items():
                    print(sv)
                    if len(vinfo['t']) < 1:
                        print('no variable data to plot')
                    else:
                        sv_units = vinfo['units'][0]
                        fv = vinfo['fv'][0]
                        t0 = pd.to_datetime(min(vinfo['t'])).strftime('%Y-%m-%dT%H:%M:%S')
                        t1 = pd.to_datetime(max(vinfo['t'])).strftime('%Y-%m-%dT%H:%M:%S')
                        colors = cm.rainbow(np.linspace(0, 1, len(vinfo['t'])))
                        t = vinfo['t']
                        z = vinfo['values']
                        y = vinfo['pressure']


                    # Check if the array is all NaNs
                    if sum(np.isnan(z)) == len(z):
                        print('Array of all NaNs - skipping plot.')
                        continue

                    # Check if the array is all fill values
                    elif len(z[z != fv]) == 0:
                        print('Array of all fill values - skipping plot.')
                        continue

                    else:
                        # reject erroneous data
                        dtime, zpressure, ndata, lenfv, lennan, lenev, lengr, global_min, global_max = \
                            cf.reject_erroneous_data(r, sv, t, y, z, fv)


                        # create data groups
                        columns = ['tsec', 'dbar', str(sv)]
                        min_r = int(round(min(zpressure) - zcell_size))
                        max_r = int(round(max(zpressure) + zcell_size))
                        ranges = list(range(min_r, max_r, zcell_size))

                        groups, d_groups = gt.group_by_depth_range(dtime, zpressure, ndata, columns, ranges)
                        #     ... excluding timestamps
                        if 'scatter' in sv:
                            n_std = None #to use percentile
                        else:
                            n_std = n_std

                        #  rejecting timestamps from percentile analysis
                        y_avg, n_avg, n_min, n_max, n0_std, n1_std, l_arr, time_ex, \
                        t_nospct, z_nospct, y_nospct = cf.reject_timestamps_in_groups(groups, d_groups, n_std,
                                                                                      dtime, zpressure, ndata,
                                                                                      inpercentile)
                        print('{} using {} percentile of data grouped in {} dbar segments'.format(
                                                    len(zpressure) - len(z_nospct), inpercentile, zcell_size))

                        """
                        writing timestamps to .csv file to use with data_range.py script
                        """
                        if len(time_ex) != 0:
                            t_exclude = time_ex[0]
                            for i in range(len(time_ex))[1:len(time_ex)]:
                                t_exclude = '{}, {}'.format(t_exclude, time_ex[i])

                            stat_data = pd.DataFrame({'deployments': deployment,
                                                      'time_to_exclude': t_exclude}, index=[sv])
                            stat_data.to_csv(file_exclude, index=True, mode='a', header=False)

                        # reject time range from data portal file export
                        t_portal, z_portal, y_portal = cf.reject_timestamps_dataportal(subsite, r,
                                                                                       t_nospct, z_nospct, y_nospct)
                        print('{} using visual inspection of data'.format(len(z_nospct) - len(z_portal),
                                                                                            inpercentile, zcell_size))

                        # reject data in a depth range
                        if zdbar is not None:
                            y_ind = y_portal < zdbar
                            t_array = t_portal[y_ind]
                            y_array = y_portal[y_ind]
                            z_array = z_portal[y_ind]
                        else:
                            y_ind = []
                            t_array = t_portal
                            y_array = y_portal
                            z_array = z_portal
                        print('{} in water depth > {} dbar'.format(len(y_ind), zdbar))

                    """
                     Plot data
                     """
                    if len(t_array) > 0:
                        if m == 'common_stream_placeholder':
                            sname = '-'.join((sv, r))
                        else:
                            sname = '-'.join((sv, r, m))

                    xlabel = sv + " (" + sv_units + ")"
                    ylabel = y_name[0] + " (" + y_unit[0] + ")"
                    clabel = 'Time'
                    title = ' '.join((deployment, r, m))

                    # plot non-erroneous data
                    fig, ax = pf.plot_profiles(ndata, zpressure, dtime,
                                               ylabel, xlabel, clabel, end_times, deployments, stdev=None)
                    ax.set_title(title, fontsize=9)
                    ax.plot(n_avg, y_avg, '-k')
                    ax.fill_betweenx(y_avg, n0_std, n1_std, color='m', alpha=0.2)
                    leg_text = (
                        'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}]'.format(
                            len(z) - lenfv, len(z) - lennan, len(z) - lenev, lengr, global_min, global_max) + '\n' +
                        ('(black) data average in {} dbar segments'.format(zcell_size)) + '\n' +
                        ('(magenta) upper and lower {} percentile envelope in {} dbar segments'.format(
                                                                                            inpercentile, zcell_size)),)
                    ax.legend(leg_text, loc='upper center', bbox_to_anchor=(0.5, -0.17), fontsize=6)
                    fig.tight_layout()
                    sfile = '_'.join(('rm_erroneous_data', sname))
                    pf.save_fig(save_dir, sfile)

                    # plot excluding time ranges for suspect data
                    if len(z_nospct) != len(zpressure):
                        fig, ax = pf.plot_profiles(z_nospct, y_nospct, t_nospct,
                                                   ylabel, xlabel, clabel, end_times, deployments, stdev=None)

                        ax.set_title(title, fontsize=9)
                        leg_text = (
                         'removed {} in the upper and lower {} percentile of data grouped in {} dbar segments'.format(
                                                             len(zpressure) - len(z_nospct), inpercentile, zcell_size),)
                        ax.legend(leg_text, loc='upper center', bbox_to_anchor=(0.5, -0.17), fontsize=6)
                        fig.tight_layout()
                        sfile = '_'.join(('rm_suspect_data', sname))
                        pf.save_fig(save_dir, sfile)

                    # plot excluding time ranges from data portal export
                    if len(z_nospct) - len(z_portal):
                        fig, ax = pf.plot_profiles(z_portal, y_portal, t_portal,
                                                   ylabel, xlabel, clabel, end_times, deployments, stdev=None)
                        ax.set_title(title, fontsize=9)
                        leg_text = ('excluded {} suspect data when inspected visually'.format(
                                                                                        len(z_nospct) - len(z_portal)),)
                        ax.legend(leg_text, loc='upper center', bbox_to_anchor=(0.5, -0.17), fontsize=6)
                        fig.tight_layout()
                        sfile = '_'.join(('rm_v_suspect_data', sname))
                        pf.save_fig(save_dir, sfile)


                    # Plot excluding a selected depth value
                    if len(z_array) != len(z_array):
                        fig, ax = pf.plot_profiles(z_array, y_array, t_array,
                                                   ylabel, xlabel, clabel, end_times, deployments, stdev=None)

                        ax.set_title(title, fontsize=9)
                        leg_text = ('excluded {} suspect data in water depth greater than {} dbar'.format(len(y_ind), zdbar),)
                        ax.legend(leg_text, loc='upper center', bbox_to_anchor=(0.5, -0.17), fontsize=6)
                        fig.tight_layout()
                        sfile = '_'.join(('rm_depth_range', sname))
                        pf.save_fig(save_dir, sfile)