Python plot_profiles Examples

Programming Language: Python

Namespace/Package Name: functions.plotting

Method/Function: plot_profiles

Examples at hotexamples.com: 9

Python plot_profiles - 9 examples found. These are the top rated real world Python examples of functions.plotting.plot_profiles extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def main(url_list, sDir, deployment_num, start_time, end_time, preferred_only,
         n_std, surface_params, depth_params):
    rd_list = []
    for uu in url_list:
        elements = uu.split('/')[-2].split('-')
        rd = '-'.join((elements[1], elements[2], elements[3], elements[4]))
        if rd not in rd_list and 'ENG' not in rd:
            rd_list.append(rd)

    for r in rd_list:
        print('\n{}'.format(r))
        datasets = []
        for u in url_list:
            splitter = u.split('/')[-2].split('-')
            rd_check = '-'.join(
                (splitter[1], splitter[2], splitter[3], splitter[4]))
            if rd_check == r:
                udatasets = cf.get_nc_urls([u])
                datasets.append(udatasets)
        datasets = list(itertools.chain(*datasets))
        fdatasets = []
        if preferred_only == 'yes':
            # get the preferred stream information
            ps_df, n_streams = cf.get_preferred_stream_info(r)
            for index, row in ps_df.iterrows():
                for ii in range(n_streams):
                    try:
                        rms = '-'.join((r, row[ii]))
                    except TypeError:
                        continue
                    for dd in datasets:
                        spl = dd.split('/')[-2].split('-')
                        catalog_rms = '-'.join(
                            (spl[1], spl[2], spl[3], spl[4], spl[5], spl[6]))
                        fdeploy = dd.split('/')[-1].split('_')[0]
                        if rms == catalog_rms and fdeploy == row['deployment']:
                            fdatasets.append(dd)
        else:
            fdatasets = datasets

        main_sensor = r.split('-')[-1]
        fdatasets_sel = cf.filter_collocated_instruments(
            main_sensor, fdatasets)

        for fd in fdatasets_sel:
            part_d = fd.split('/')[-1]
            print('\n{}'.format(part_d))
            ds = xr.open_dataset(fd, mask_and_scale=False)
            ds = ds.swap_dims({'obs': 'time'})

            fname, subsite, refdes, method, stream, deployment = cf.nc_attributes(
                fd)
            array = subsite[0:2]
            sci_vars = cf.return_science_vars(stream)

            if 'CE05MOAS' in r or 'CP05MOAS' in r:  # for coastal gliders, get m_water_depth for bathymetry
                eng = '-'.join((r.split('-')[0], r.split('-')[1],
                                '00-ENG000000', method, 'glider_eng'))
                eng_url = [s for s in url_list if eng in s]
                if len(eng_url) == 1:
                    eng_datasets = cf.get_nc_urls(eng_url)
                    # filter out collocated datasets
                    eng_dataset = [
                        j for j in eng_datasets
                        if (eng in j.split('/')[-1]
                            and deployment in j.split('/')[-1])
                    ]
                    if len(eng_dataset) > 0:
                        ds_eng = xr.open_dataset(eng_dataset[0],
                                                 mask_and_scale=False)
                        t_eng = ds_eng['time'].values
                        m_water_depth = ds_eng['m_water_depth'].values

                        # m_altimeter_status = 0 means a good reading (not nan or -1)
                        eng_ind = ds_eng['m_altimeter_status'].values == 0
                        m_water_depth = m_water_depth[eng_ind]
                        t_eng = t_eng[eng_ind]
                    else:
                        print('No engineering file for deployment {}'.format(
                            deployment))
                        m_water_depth = None
                        t_eng = None
                else:
                    m_water_depth = None
                    t_eng = None
            else:
                m_water_depth = None
                t_eng = None

            if deployment_num is not None:
                if int(deployment.split('0')[-1]) is not deployment_num:
                    print(type(int(deployment.split('0')[-1])),
                          type(deployment_num))
                    continue

            if start_time is not None and end_time is not None:
                ds = ds.sel(time=slice(start_time, end_time))
                if len(ds['time'].values) == 0:
                    print(
                        'No data to plot for specified time range: ({} to {})'.
                        format(start_time, end_time))
                    continue
                stime = start_time.strftime('%Y-%m-%d')
                etime = end_time.strftime('%Y-%m-%d')
                ext = stime + 'to' + etime  # .join((ds0_method, ds1_method
                save_dir_profile = os.path.join(sDir, array, subsite, refdes,
                                                'profile_plots', deployment,
                                                ext)
                save_dir_xsection = os.path.join(sDir, array, subsite, refdes,
                                                 'xsection_plots', deployment,
                                                 ext)
                save_dir_4d = os.path.join(sDir, array, subsite, refdes,
                                           'xsection_plots_4d', deployment,
                                           ext)
            else:
                save_dir_profile = os.path.join(sDir, array, subsite, refdes,
                                                'profile_plots', deployment)
                save_dir_xsection = os.path.join(sDir, array, subsite, refdes,
                                                 'xsection_plots', deployment)
                save_dir_4d = os.path.join(sDir, array, subsite, refdes,
                                           'xsection_plots_4d', deployment)

            tm = ds['time'].values
            try:
                ds_lat = ds['lat'].values
            except KeyError:
                ds_lat = None
                print('No latitude variable in file')
            try:
                ds_lon = ds['lon'].values
            except KeyError:
                ds_lon = None
                print('No longitude variable in file')

            # get pressure variable
            y, y_units, press = cf.add_pressure_to_dictionary_of_sci_vars(ds)

            for sv in sci_vars:
                print(sv)
                if 'pressure' not in sv:
                    z = ds[sv].values
                    fv = ds[sv]._FillValue
                    sv_units = ds[sv].units

                    # Check if the array is all NaNs
                    if sum(np.isnan(z)) == len(z):
                        print('Array of all NaNs - skipping plot.')
                        continue

                    # Check if the array is all fill values
                    elif len(z[z != fv]) == 0:
                        print('Array of all fill values - skipping plot.')
                        continue

                    else:
                        # reject erroneous data
                        dtime, zpressure, ndata, lenfv, lennan, lenev, lengr, global_min, global_max, lat, lon = \
                            cf.reject_erroneous_data(r, sv, tm, y, z, fv, ds_lat, ds_lon)

                        # get rid of 0.0 data
                        if 'CTD' in r:
                            ind = zpressure > 0.0
                        else:
                            ind = ndata > 0.0

                        lenzero = np.sum(~ind)
                        dtime = dtime[ind]
                        zpressure = zpressure[ind]
                        ndata = ndata[ind]
                        if ds_lat is not None and ds_lon is not None:
                            lat = lat[ind]
                            lon = lon[ind]
                        else:
                            lat = None
                            lon = None

                        t0 = pd.to_datetime(
                            dtime.min()).strftime('%Y-%m-%dT%H:%M:%S')
                        t1 = pd.to_datetime(
                            dtime.max()).strftime('%Y-%m-%dT%H:%M:%S')
                        title = ' '.join((deployment, refdes,
                                          method)) + '\n' + t0 + ' to ' + t1

                        # reject time range from data portal file export
                        t_portal, z_portal, y_portal, lat_portal, lon_portal = \
                            cf.reject_timestamps_dataportal(subsite, r, dtime, zpressure, ndata, lat, lon)

                        print(
                            'removed {} data points using visual inspection of data'
                            .format(len(ndata) - len(z_portal)))

                        # create data groups
                        columns = ['tsec', 'dbar', str(sv)]
                        # min_r = int(round(min(y_portal) - zcell_size))
                        # max_r = int(round(max(y_portal) + zcell_size))
                        # ranges = list(range(min_r, max_r, zcell_size))
                        #ranges = [0, 10, 20, 30, 40, 50, 60, 70, 80, 200]
                        range1 = list(
                            range(surface_params[0], surface_params[1],
                                  surface_params[2]))
                        range2 = list(
                            range(depth_params[0],
                                  depth_params[1] + depth_params[2],
                                  depth_params[2]))
                        ranges = range1 + range2

                        groups, d_groups = gt.group_by_depth_range(
                            t_portal, y_portal, z_portal, columns, ranges)

                        if 'scatter' in sv:
                            n_std = None  # to use percentile
                        else:
                            n_std = n_std

                        #  get percentile analysis for printing on the profile plot
                        inpercentile = [surface_params[3]] * len(
                            range1) + [depth_params[3]] * len(range2)
                        n_std = [surface_params[3]] * len(
                            range1) + [depth_params[3]] * len(range2)
                        y_plt, n_med, n_min, n_max, n0_std, n1_std, l_arr, time_ex = reject_timestamps_in_groups(
                            groups, d_groups, n_std, inpercentile)
                        """
                        Plot all data
                        """
                        if len(tm) > 0:
                            cf.create_dir(save_dir_profile)
                            cf.create_dir(save_dir_xsection)
                            sname = '-'.join((r, method, sv))
                            sfileall = '_'.join(('all_data', sname))
                            '''
                            profile plot
                            '''
                            xlabel = sv + " (" + sv_units + ")"
                            ylabel = press[0] + " (" + y_units[0] + ")"
                            clabel = 'Time'

                            fig, ax = pf.plot_profiles(z,
                                                       y,
                                                       tm,
                                                       ylabel,
                                                       xlabel,
                                                       clabel,
                                                       stdev=None)

                            ax.set_title(title, fontsize=9)
                            fig.tight_layout()
                            pf.save_fig(save_dir_profile, sfileall)
                            '''
                            xsection plot
                            '''
                            clabel = sv + " (" + sv_units + ")"
                            ylabel = press[0] + " (" + y_units[0] + ")"

                            fig, ax, bar = pf.plot_xsection(subsite,
                                                            tm,
                                                            y,
                                                            z,
                                                            clabel,
                                                            ylabel,
                                                            t_eng,
                                                            m_water_depth,
                                                            inpercentile=None,
                                                            stdev=None)

                            ax.set_title(title, fontsize=9)
                            fig.tight_layout()
                            pf.save_fig(save_dir_xsection, sfileall)
                        """
                        Plot cleaned-up data
                        """
                        if len(dtime) > 0:

                            sfile = '_'.join(('rm_erroneous_data', sname))
                            '''
                            profile plot
                            '''
                            xlabel = sv + " (" + sv_units + ")"
                            ylabel = press[0] + " (" + y_units[0] + ")"
                            clabel = 'Time'

                            fig, ax = pf.plot_profiles(z_portal,
                                                       y_portal,
                                                       t_portal,
                                                       ylabel,
                                                       xlabel,
                                                       clabel,
                                                       stdev=None)

                            ax.set_title(title, fontsize=9)
                            ax.plot(n_med, y_plt, '.k')
                            ax.fill_betweenx(y_plt,
                                             n0_std,
                                             n1_std,
                                             color='m',
                                             alpha=0.2)
                            leg_text = (
                                'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}], '
                                '{} zeros'.format(lenfv, lennan, lenev, lengr,
                                                  global_min, global_max,
                                                  lenzero) +
                                '\nexcluded {} suspect data points when inspected visually'
                                .format(len(ndata) - len(z_portal)) +
                                '\n(black) data median in {} dbar segments (break at {} dbar)'
                                .format([surface_params[2], depth_params[2]],
                                        depth_params[0]) +
                                '\n(magenta) upper and lower {} percentile envelope in {} dbar segments'
                                .format(
                                    [surface_params[3], depth_params[3]],
                                    [surface_params[2], depth_params[2]]), )
                            ax.legend(leg_text,
                                      loc='upper center',
                                      bbox_to_anchor=(0.5, -0.17),
                                      fontsize=6)
                            fig.tight_layout()
                            pf.save_fig(save_dir_profile, sfile)
                            '''
                            xsection plot
                            '''
                            clabel = sv + " (" + sv_units + ")"
                            ylabel = press[0] + " (" + y_units[0] + ")"

                            # plot non-erroneous data
                            fig, ax, bar = pf.plot_xsection(subsite,
                                                            t_portal,
                                                            y_portal,
                                                            z_portal,
                                                            clabel,
                                                            ylabel,
                                                            t_eng,
                                                            m_water_depth,
                                                            inpercentile=None,
                                                            stdev=None)

                            ax.set_title(title, fontsize=9)
                            leg_text = (
                                'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}], '
                                '{} zeros'.format(lenfv, lennan, lenev, lengr,
                                                  global_min, global_max,
                                                  lenzero) +
                                '\nexcluded {} suspect data points when inspected visually'
                                .format(len(ndata) - len(z_portal)), )
                            ax.legend(leg_text,
                                      loc='upper center',
                                      bbox_to_anchor=(0.5, -0.17),
                                      fontsize=6)
                            fig.tight_layout()
                            pf.save_fig(save_dir_xsection, sfile)
                            '''
                            4D plot for gliders only
                            '''
                            if 'MOAS' in r:
                                if ds_lat is not None and ds_lon is not None:
                                    cf.create_dir(save_dir_4d)

                                    clabel = sv + " (" + sv_units + ")"
                                    zlabel = press[0] + " (" + y_units[0] + ")"

                                    fig = plt.figure()
                                    ax = fig.add_subplot(111, projection='3d')
                                    sct = ax.scatter(lon_portal,
                                                     lat_portal,
                                                     y_portal,
                                                     c=z_portal,
                                                     s=2)
                                    cbar = plt.colorbar(sct,
                                                        label=clabel,
                                                        extend='both')
                                    cbar.ax.tick_params(labelsize=8)
                                    ax.invert_zaxis()
                                    ax.view_init(25, 32)
                                    ax.invert_xaxis()
                                    ax.invert_yaxis()
                                    ax.set_zlabel(zlabel, fontsize=9)
                                    ax.set_ylabel('Latitude', fontsize=9)
                                    ax.set_xlabel('Longitude', fontsize=9)

                                    ax.set_title(title, fontsize=9)
                                    pf.save_fig(save_dir_4d, sfile)

Example #2

Show file

def main(url_list, sDir, deployment_num, start_time, end_time, preferred_only, n_std, inpercentile, zcell_size, zdbar):
    rd_list = []
    for uu in url_list:
        elements = uu.split('/')[-2].split('-')
        rd = '-'.join((elements[1], elements[2], elements[3], elements[4]))
        if rd not in rd_list and 'ENG' not in rd and 'ADCP' not in rd:
            rd_list.append(rd)

    for r in rd_list:
        print('\n{}'.format(r))
        datasets = []
        for u in url_list:
            splitter = u.split('/')[-2].split('-')
            rd_check = '-'.join((splitter[1], splitter[2], splitter[3], splitter[4]))
            if rd_check == r:
                udatasets = cf.get_nc_urls([u])
                datasets.append(udatasets)
        datasets = list(itertools.chain(*datasets))
        fdatasets = []
        if preferred_only == 'yes':
            # get the preferred stream information
            ps_df, n_streams = cf.get_preferred_stream_info(r)
            for index, row in ps_df.iterrows():
                for ii in range(n_streams):
                    try:
                        rms = '-'.join((r, row[ii]))
                    except TypeError:
                        continue
                    for dd in datasets:
                        spl = dd.split('/')[-2].split('-')
                        catalog_rms = '-'.join((spl[1], spl[2], spl[3], spl[4], spl[5], spl[6]))
                        fdeploy = dd.split('/')[-1].split('_')[0]
                        if rms == catalog_rms and fdeploy == row['deployment']:
                            fdatasets.append(dd)
        else:
            fdatasets = datasets

        main_sensor = r.split('-')[-1]
        fdatasets_sel = cf.filter_collocated_instruments(main_sensor, fdatasets)

        for fd in fdatasets_sel:
            part_d = fd.split('/')[-1]
            print('\n{}'.format(part_d))
            ds = xr.open_dataset(fd, mask_and_scale=False)
            ds = ds.swap_dims({'obs': 'time'})

            fname, subsite, refdes, method, stream, deployment = cf.nc_attributes(fd)
            array = subsite[0:2]
            sci_vars = cf.return_science_vars(stream)

            # if 'CE05MOAS' in r or 'CP05MOAS' in r:  # for coastal gliders, get m_water_depth for bathymetry
            #     eng = '-'.join((r.split('-')[0], r.split('-')[1], '00-ENG000000', method, 'glider_eng'))
            #     eng_url = [s for s in url_list if eng in s]
            #     if len(eng_url) == 1:
            #         eng_datasets = cf.get_nc_urls(eng_url)
            #         # filter out collocated datasets
            #         eng_dataset = [j for j in eng_datasets if (eng in j.split('/')[-1] and deployment in j.split('/')[-1])]
            #         if len(eng_dataset) > 0:
            #             ds_eng = xr.open_dataset(eng_dataset[0], mask_and_scale=False)
            #             t_eng = ds_eng['time'].values
            #             m_water_depth = ds_eng['m_water_depth'].values
            #
            #             # m_altitude = glider height above seafloor
            #             # m_depth = glider depth in the water column
            #             # m_altitude = ds_eng['m_altitude'].values
            #             # m_depth = ds_eng['m_depth'].values
            #             # calc_water_depth = m_altitude + m_depth
            #
            #             # m_altimeter_status = 0 means a good reading (not nan or -1)
            #             try:
            #                 eng_ind = ds_eng['m_altimeter_status'].values == 0
            #             except KeyError:
            #                 eng_ind = (~np.isnan(m_water_depth)) & (m_water_depth >= 0)
            #
            #             m_water_depth = m_water_depth[eng_ind]
            #             t_eng = t_eng[eng_ind]
            #
            #             # get rid of any remaining nans or fill values
            #             eng_ind2 = (~np.isnan(m_water_depth)) & (m_water_depth >= 0)
            #             m_water_depth = m_water_depth[eng_ind2]
            #             t_eng = t_eng[eng_ind2]
            #         else:
            #             print('No engineering file for deployment {}'.format(deployment))
            #             m_water_depth = None
            #             t_eng = None
            #     else:
            #         m_water_depth = None
            #         t_eng = None
            # else:
            #     m_water_depth = None
            #     t_eng = None

            if deployment_num is not None:
                if int(int(deployment[-4:])) is not deployment_num:
                    print(type(int(deployment[-4:])), type(deployment_num))
                    continue

            if start_time is not None and end_time is not None:
                ds = ds.sel(time=slice(start_time, end_time))
                if len(ds['time'].values) == 0:
                    print('No data to plot for specified time range: ({} to {})'.format(start_time, end_time))
                    continue
                stime = start_time.strftime('%Y-%m-%d')
                etime = end_time.strftime('%Y-%m-%d')
                ext = stime + 'to' + etime  # .join((ds0_method, ds1_method
                save_dir_profile = os.path.join(sDir, array, subsite, refdes, 'profile_plots', deployment, ext)
                save_dir_xsection = os.path.join(sDir, array, subsite, refdes, 'xsection_plots', deployment, ext)
                save_dir_4d = os.path.join(sDir, array, subsite, refdes, 'xsection_plots_4d', deployment, ext)
            else:
                save_dir_profile = os.path.join(sDir, array, subsite, refdes, 'profile_plots', deployment)
                save_dir_xsection = os.path.join(sDir, array, subsite, refdes, 'xsection_plots', deployment)
                save_dir_4d = os.path.join(sDir, array, subsite, refdes, 'xsection_plots_4d', deployment)

            time1 = ds['time'].values
            try:
                ds_lat1 = ds['lat'].values
            except KeyError:
                ds_lat1 = None
                print('No latitude variable in file')
            try:
                ds_lon1 = ds['lon'].values
            except KeyError:
                ds_lon1 = None
                print('No longitude variable in file')

            # get pressure variable
            pvarname, y1, y_units, press, y_fillvalue = cf.add_pressure_to_dictionary_of_sci_vars(ds)

            for sv in sci_vars:
                print('')
                print(sv)
                if 'pressure' not in sv:
                    if sv == 'spkir_abj_cspp_downwelling_vector':
                        pxso.pf_xs_spkir(ds, sv, time1, y1, ds_lat1, ds_lon1, zcell_size, inpercentile, save_dir_profile,
                                         save_dir_xsection, deployment, press, y_units, n_std, zdbar)
                    elif 'OPTAA' in r:
                        if sv not in ['wavelength_a', 'wavelength_c']:
                            pxso.pf_xs_optaa(ds, sv, time1, y1, ds_lat1, ds_lon1, zcell_size, inpercentile, save_dir_profile,
                                             save_dir_xsection, deployment, press, y_units, n_std, zdbar)
                    else:
                        z1 = ds[sv].values
                        fv = ds[sv]._FillValue
                        sv_units = ds[sv].units

                        # Check if the array is all NaNs
                        if sum(np.isnan(z1)) == len(z1):
                            print('Array of all NaNs - skipping plot.')
                            continue

                        # Check if the array is all fill values
                        elif len(z1[z1 != fv]) == 0:
                            print('Array of all fill values - skipping plot.')
                            continue

                        else:
                            # remove unreasonable pressure data (e.g. for surface piercing profilers)
                            if zdbar:
                                po_ind = (0 < y1) & (y1 < zdbar)
                                tm = time1[po_ind]
                                y = y1[po_ind]
                                z = z1[po_ind]
                                ds_lat = ds_lat1[po_ind]
                                ds_lon = ds_lon1[po_ind]
                            else:
                                tm = time1
                                y = y1
                                z = z1
                                ds_lat = ds_lat1
                                ds_lon = ds_lon1

                            # reject erroneous data
                            dtime, zpressure, ndata, lenfv, lennan, lenev, lengr, global_min, global_max, lat, lon = \
                                cf.reject_erroneous_data(r, sv, tm, y, z, fv, ds_lat, ds_lon)

                            # get rid of 0.0 data
                            if sv == 'salinity':
                                ind = ndata > 30
                            elif sv == 'density':
                                ind = ndata > 1022.5
                            elif sv == 'conductivity':
                                ind = ndata > 3.45
                            else:
                                ind = ndata > 0
                            # if sv == 'sci_flbbcd_chlor_units':
                            #     ind = ndata < 7.5
                            # elif sv == 'sci_flbbcd_cdom_units':
                            #     ind = ndata < 25
                            # else:
                            #     ind = ndata > 0.0

                            # if 'CTD' in r:
                            #     ind = zpressure > 0.0
                            # else:
                            #     ind = ndata > 0.0

                            lenzero = np.sum(~ind)
                            dtime = dtime[ind]
                            zpressure = zpressure[ind]
                            ndata = ndata[ind]
                            if ds_lat is not None and ds_lon is not None:
                                lat = lat[ind]
                                lon = lon[ind]
                            else:
                                lat = None
                                lon = None

                            if len(dtime) > 0:
                                # reject time range from data portal file export
                                t_portal, z_portal, y_portal, lat_portal, lon_portal = \
                                    cf.reject_timestamps_dataportal(subsite, r, dtime, zpressure, ndata, lat, lon)

                                print('removed {} data points using visual inspection of data'.format(
                                    len(ndata) - len(z_portal)))

                                # create data groups
                                if len(y_portal) > 0:
                                    columns = ['tsec', 'dbar', str(sv)]
                                    min_r = int(round(np.nanmin(y_portal) - zcell_size))
                                    max_r = int(round(np.nanmax(y_portal) + zcell_size))
                                    ranges = list(range(min_r, max_r, zcell_size))

                                    groups, d_groups = gt.group_by_depth_range(t_portal, y_portal, z_portal, columns, ranges)

                                    if 'scatter' in sv:
                                        n_std = None  # to use percentile
                                    else:
                                        n_std = n_std

                                    #  get percentile analysis for printing on the profile plot
                                    y_avg, n_avg, n_min, n_max, n0_std, n1_std, l_arr, time_ex = cf.reject_timestamps_in_groups(
                                        groups, d_groups, n_std, inpercentile)

                            """
                            Plot all data
                            """
                            if len(time1) > 0:
                                cf.create_dir(save_dir_profile)
                                cf.create_dir(save_dir_xsection)
                                sname = '-'.join((r, method, sv))
                                sfileall = '_'.join(('all_data', sname, pd.to_datetime(time1.min()).strftime('%Y%m%d')))
                                tm0 = pd.to_datetime(time1.min()).strftime('%Y-%m-%dT%H:%M:%S')
                                tm1 = pd.to_datetime(time1.max()).strftime('%Y-%m-%dT%H:%M:%S')
                                title = ' '.join((deployment, refdes, method)) + '\n' + tm0 + ' to ' + tm1
                                if 'SPKIR' in r:
                                    title = title + '\nWavelength = 510 nm'

                                '''
                                profile plot
                                '''
                                xlabel = sv + " (" + sv_units + ")"
                                ylabel = press[0] + " (" + y_units[0] + ")"
                                clabel = 'Time'

                                fig, ax = pf.plot_profiles(z1, y1, time1, ylabel, xlabel, clabel, stdev=None)

                                ax.set_title(title, fontsize=9)
                                fig.tight_layout()
                                pf.save_fig(save_dir_profile, sfileall)

                                '''
                                xsection plot
                                '''
                                clabel = sv + " (" + sv_units + ")"
                                ylabel = press[0] + " (" + y_units[0] + ")"

                                fig, ax, bar = pf.plot_xsection(subsite, time1, y1, z1, clabel, ylabel, t_eng=None,
                                                                m_water_depth=None, inpercentile=None, stdev=None)

                                if fig:
                                    ax.set_title(title, fontsize=9)
                                    fig.tight_layout()
                                    pf.save_fig(save_dir_xsection, sfileall)

                            """
                            Plot cleaned-up data
                            """
                            if len(dtime) > 0:
                                if len(y_portal) > 0:
                                    sfile = '_'.join(('rm_erroneous_data', sname, pd.to_datetime(t_portal.min()).strftime('%Y%m%d')))
                                    t0 = pd.to_datetime(t_portal.min()).strftime('%Y-%m-%dT%H:%M:%S')
                                    t1 = pd.to_datetime(t_portal.max()).strftime('%Y-%m-%dT%H:%M:%S')
                                    title = ' '.join((deployment, refdes, method)) + '\n' + t0 + ' to ' + t1
                                    if 'SPKIR' in r:
                                        title = title + '\nWavelength = 510 nm'

                                    '''
                                    profile plot
                                    '''
                                    xlabel = sv + " (" + sv_units + ")"
                                    ylabel = press[0] + " (" + y_units[0] + ")"
                                    clabel = 'Time'

                                    fig, ax = pf.plot_profiles(z_portal, y_portal, t_portal, ylabel, xlabel, clabel, stdev=None)

                                    ax.set_title(title, fontsize=9)
                                    ax.plot(n_avg, y_avg, '-k')
                                    ax.fill_betweenx(y_avg, n0_std, n1_std, color='m', alpha=0.2)
                                    if inpercentile:
                                        leg_text = (
                                            'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}], '
                                            '{} unreasonable values'.format(lenfv, lennan, lenev, lengr, global_min, global_max, lenzero) +
                                            '\nexcluded {} suspect data points when inspected visually'.format(
                                                len(ndata) - len(z_portal)) +
                                            '\n(black) data average in {} dbar segments'.format(zcell_size) +
                                            '\n(magenta) {} percentile envelope in {} dbar segments'.format(
                                                int(100 - inpercentile * 2), zcell_size),)
                                    elif n_std:
                                        leg_text = (
                                            'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}], '
                                            '{} unreasonable values'.format(lenfv, lennan, lenev, lengr, global_min, global_max,
                                                              lenzero) +
                                            '\nexcluded {} suspect data points when inspected visually'.format(
                                                len(ndata) - len(z_portal)) +
                                            '\n(black) data average in {} dbar segments'.format(zcell_size) +
                                            '\n(magenta) +/- {} SD envelope in {} dbar segments'.format(
                                                int(n_std), zcell_size),)
                                    ax.legend(leg_text, loc='upper center', bbox_to_anchor=(0.5, -0.17), fontsize=6)
                                    fig.tight_layout()
                                    pf.save_fig(save_dir_profile, sfile)

                                    '''
                                    xsection plot
                                    '''
                                    clabel = sv + " (" + sv_units + ")"
                                    ylabel = press[0] + " (" + y_units[0] + ")"

                                    # plot non-erroneous data
                                    fig, ax, bar = pf.plot_xsection(subsite, t_portal, y_portal, z_portal, clabel, ylabel,
                                                                    t_eng=None, m_water_depth=None, inpercentile=None,
                                                                    stdev=None)

                                    ax.set_title(title, fontsize=9)
                                    leg_text = (
                                        'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}], '
                                        '{} unreasonable values'.format(lenfv, lennan, lenev, lengr, global_min, global_max, lenzero) +
                                        '\nexcluded {} suspect data points when inspected visually'.format(
                                            len(ndata) - len(z_portal)),
                                    )
                                    ax.legend(leg_text, loc='upper center', bbox_to_anchor=(0.5, -0.17), fontsize=6)
                                    fig.tight_layout()
                                    pf.save_fig(save_dir_xsection, sfile)

                                    '''
                                    4D plot for gliders only
                                    '''
                                    if 'MOAS' in r:
                                        if ds_lat is not None and ds_lon is not None:
                                            cf.create_dir(save_dir_4d)

                                            clabel = sv + " (" + sv_units + ")"
                                            zlabel = press[0] + " (" + y_units[0] + ")"

                                            fig = plt.figure()
                                            ax = fig.add_subplot(111, projection='3d')
                                            sct = ax.scatter(lon_portal, lat_portal, y_portal, c=z_portal, s=2)
                                            cbar = plt.colorbar(sct, label=clabel, extend='both')
                                            cbar.ax.tick_params(labelsize=8)
                                            ax.invert_zaxis()
                                            ax.view_init(25, 32)
                                            ax.invert_xaxis()
                                            ax.invert_yaxis()
                                            ax.set_zlabel(zlabel, fontsize=9)
                                            ax.set_ylabel('Latitude', fontsize=9)
                                            ax.set_xlabel('Longitude', fontsize=9)

                                            ax.set_title(title, fontsize=9)
                                            pf.save_fig(save_dir_4d, sfile)

Example #3

Show file

def main(url_list, sDir, plot_type, start_time, end_time, deployment_num):
    for i, u in enumerate(url_list):
        elements = u.split('/')[-2].split('-')
        r = '-'.join((elements[1], elements[2], elements[3], elements[4]))
        ms = u.split(r + '-')[1].split('/')[0]
        subsite = r.split('-')[0]
        array = subsite[0:2]
        main_sensor = r.split('-')[-1]
        datasets = cf.get_nc_urls([u])
        datasets_sel = cf.filter_collocated_instruments(main_sensor, datasets)

        save_dir = os.path.join(sDir, array, subsite, r, plot_type)
        cf.create_dir(save_dir)
        sname = '-'.join((r, ms, 'track'))

        print('Appending....')
        sh = pd.DataFrame()
        deployments = []
        end_times = []
        for ii, d in enumerate(datasets_sel):
            print('\nDataset {} of {}: {}'.format(ii + 1, len(datasets_sel),
                                                  d.split('/')[-1]))
            ds = xr.open_dataset(d, mask_and_scale=False)
            ds = ds.swap_dims({'obs': 'time'})

            if start_time is not None and end_time is not None:
                ds = ds.sel(time=slice(start_time, end_time))
                if len(ds['time'].values) == 0:
                    print(
                        'No data to plot for specified time range: ({} to {})'.
                        format(start_time, end_time))
                    continue

            fname, subsite, refdes, method, stream, deployment = cf.nc_attributes(
                d)

            if deployment_num is not None:
                if int(deployment.split('0')[-1]) is not deployment_num:
                    print(type(int(deployment.split('0')[-1])),
                          type(deployment_num))
                    continue

            # get end times of deployments
            ps_df, n_streams = cf.get_preferred_stream_info(r)
            dr_data = cf.refdes_datareview_json(r)

            for index, row in ps_df.iterrows():
                deploy = row['deployment']
                deploy_info = cf.get_deployment_information(
                    dr_data, int(deploy[-4:]))
                if int(deploy[-4:]) not in deployments:
                    deployments.append(int(deploy[-4:]))
                if pd.to_datetime(deploy_info['stop_date']) not in end_times:
                    end_times.append(pd.to_datetime(deploy_info['stop_date']))

            data = {'lat': ds['lat'].values, 'lon': ds['lon'].values}
            new_r = pd.DataFrame(data,
                                 columns=['lat', 'lon'],
                                 index=ds['time'].values)
            sh = sh.append(new_r)

        xD = sh.lon.values
        yD = sh.lat.values
        tD = sh.index.values

        clabel = 'Time'
        ylabel = 'Latitude'
        xlabel = 'Longitude'

        fig, ax = pf.plot_profiles(xD,
                                   yD,
                                   tD,
                                   ylabel,
                                   xlabel,
                                   clabel,
                                   end_times,
                                   deployments,
                                   stdev=None)
        ax.invert_yaxis()
        ax.set_title('Glider Track - ' + r + '\n' + 'x: platform location',
                     fontsize=9)
        ax.set_xlim(-71.75, -69.75)
        ax.set_ylim(38.75, 40.75)
        #cbar.ax.set_yticklabels(end_times)

        # add Pioneer glider sampling area
        ax.add_patch(
            Rectangle((-71.5, 39.0),
                      1.58,
                      1.67,
                      linewidth=3,
                      edgecolor='b',
                      facecolor='none'))
        ax.text(-71,
                40.6,
                'Pioneer Glider Sampling Area',
                color='blue',
                fontsize=8)
        # add Pioneer AUV sampling area
        # ax.add_patch(Rectangle((-71.17, 39.67), 0.92, 1.0, linewidth=3, edgecolor='m', facecolor='none'))

        array_loc = cf.return_array_subsites_standard_loc(array)

        ax.scatter(array_loc.lon,
                   array_loc.lat,
                   s=40,
                   marker='x',
                   color='k',
                   alpha=0.3)
        #ax.legend(legn, array_loc.index, scatterpoints=1, loc='lower left', ncol=4, fontsize=8)

        pf.save_fig(save_dir, sname)

Example #4

Show file

File: plot_profile_xsection_cabled.py Project: ooi-data-lab/data-review-tools

def main(url_list, sDir, deployment_num, start_time, end_time, preferred_only,
         n_std, inpercentile, zcell_size, zdbar):
    rd_list = []
    for uu in url_list:
        elements = uu.split('/')[-2].split('-')
        rd = '-'.join((elements[1], elements[2], elements[3], elements[4]))
        if rd not in rd_list and 'ENG' not in rd and 'ADCP' not in rd:
            rd_list.append(rd)

    for r in rd_list:
        print('\n{}'.format(r))
        datasets = []
        deployments = []
        for url in url_list:
            splitter = url.split('/')[-2].split('-')
            rd_check = '-'.join(
                (splitter[1], splitter[2], splitter[3], splitter[4]))
            catalog_rms = '-'.join((r, splitter[-2], splitter[-1]))
            if rd_check == r:
                udatasets = cf.get_nc_urls([url])
                for u in udatasets:  # filter out collocated data files
                    if catalog_rms == u.split('/')[-1].split('_20')[0][15:]:
                        datasets.append(u)
                        deployments.append(
                            int(u.split('/')[-1].split('_')[0][-4:]))
        deployments = np.unique(deployments).tolist()
        fdatasets = []
        if preferred_only == 'yes':
            # get the preferred stream information
            ps_df, n_streams = cf.get_preferred_stream_info(r)
            for index, row in ps_df.iterrows():
                for ii in range(n_streams):
                    try:
                        rms = '-'.join((r, row[ii]))
                    except TypeError:
                        continue
                    for dd in datasets:
                        spl = dd.split('/')[-2].split('-')
                        catalog_rms = '-'.join(
                            (spl[1], spl[2], spl[3], spl[4], spl[5], spl[6]))
                        fdeploy = dd.split('/')[-1].split('_')[0]
                        if rms == catalog_rms and fdeploy == row['deployment']:
                            fdatasets.append(dd)
        else:
            fdatasets = datasets

        main_sensor = r.split('-')[-1]
        fdatasets_sel = cf.filter_collocated_instruments(
            main_sensor, fdatasets)

        for dep in deployments:
            if deployment_num is not None:
                if dep is not deployment_num:
                    print('\nskipping deployment {}'.format(dep))
                    continue
            rdatasets = [
                s for s in fdatasets_sel if 'deployment%04d' % dep in s
            ]
            rdatasets.sort()
            if len(rdatasets) > 0:
                sci_vars_dict = {}
                # rdatasets = rdatasets[0:2]  #### for testing
                for i in range(len(rdatasets)):
                    ds = xr.open_dataset(rdatasets[i], mask_and_scale=False)
                    ds = ds.swap_dims({'obs': 'time'})
                    print('\nAppending data from {}: file {} of {}'.format(
                        'deployment%04d' % dep, i + 1, len(rdatasets)))

                    array = r[0:2]
                    subsite = r.split('-')[0]

                    if start_time is not None and end_time is not None:
                        ds = ds.sel(time=slice(start_time, end_time))
                        if len(ds['time'].values) == 0:
                            print(
                                'No data to plot for specified time range: ({} to {})'
                                .format(start_time, end_time))
                            continue
                        stime = start_time.strftime('%Y-%m-%d')
                        etime = end_time.strftime('%Y-%m-%d')
                        ext = stime + 'to' + etime  # .join((ds0_method, ds1_method
                        save_dir_profile = os.path.join(
                            sDir, array, subsite, r, 'profile_plots',
                            'deployment%04d' % dep, ext)
                        save_dir_xsection = os.path.join(
                            sDir, array, subsite, r, 'xsection_plots',
                            'deployment%04d' % dep, ext)
                    else:
                        save_dir_profile = os.path.join(
                            sDir, array, subsite, r, 'profile_plots',
                            'deployment%04d' % dep)
                        save_dir_xsection = os.path.join(
                            sDir, array, subsite, r, 'xsection_plots',
                            'deployment%04d' % dep)

                    if len(sci_vars_dict) == 0:
                        fname, subsite, refdes, method, stream, deployment = cf.nc_attributes(
                            rdatasets[0])
                        sci_vars = cf.return_science_vars(stream)
                        if 'CTDPF' not in r:
                            sci_vars.append('int_ctd_pressure')
                        sci_vars.append('time')
                        sci_vars = list(np.unique(sci_vars))

                        # initialize the dictionary
                        for sci_var in sci_vars:
                            if sci_var == 'time':
                                sci_vars_dict.update({
                                    sci_var:
                                    dict(values=np.array([],
                                                         dtype=np.datetime64),
                                         units=[],
                                         fv=[])
                                })
                            else:
                                sci_vars_dict.update({
                                    sci_var:
                                    dict(values=np.array([]), units=[], fv=[])
                                })

                    # append data for the deployment into the dictionary
                    for s_v in sci_vars_dict.keys():
                        vv = ds[s_v]
                        try:
                            if vv.units not in sci_vars_dict[s_v]['units']:
                                sci_vars_dict[s_v]['units'].append(vv.units)
                        except AttributeError:
                            print('')
                        try:
                            if vv._FillValue not in sci_vars_dict[s_v]['fv']:
                                sci_vars_dict[s_v]['fv'].append(vv._FillValue)
                                vv_data = vv.values
                                try:
                                    vv_data[
                                        vv_data == vv.
                                        _FillValue] = np.nan  # turn fill values to nans
                                except ValueError:
                                    print('')
                        except AttributeError:
                            print('')

                        if len(vv.dims) > 1:
                            print('Skipping plot: variable has >1 dimension')
                        else:
                            sci_vars_dict[s_v]['values'] = np.append(
                                sci_vars_dict[s_v]['values'], vv.values)

                # plot after appending all data into one file
                data_start = pd.to_datetime(
                    min(sci_vars_dict['time']['values'])).strftime(
                        '%Y-%m-%dT%H:%M:%S')
                data_stop = pd.to_datetime(max(
                    sci_vars_dict['time']['values'])).strftime(
                        '%Y-%m-%dT%H:%M:%S')
                time1 = sci_vars_dict['time']['values']
                ds_lat1 = np.empty(np.shape(time1))
                ds_lon1 = np.empty(np.shape(time1))

                # define pressure variable
                try:
                    pname = 'seawater_pressure'
                    press = sci_vars_dict[pname]
                except KeyError:
                    pname = 'int_ctd_pressure'
                    press = sci_vars_dict[pname]
                y1 = press['values']
                try:
                    y_units = press['units'][0]
                except IndexError:
                    y_units = ''

                for sv in sci_vars_dict.keys():
                    print('')
                    print(sv)
                    if sv not in [
                            'seawater_pressure', 'int_ctd_pressure', 'time'
                    ]:
                        z1 = sci_vars_dict[sv]['values']
                        fv = sci_vars_dict[sv]['fv'][0]
                        sv_units = sci_vars_dict[sv]['units'][0]

                        # Check if the array is all NaNs
                        if sum(np.isnan(z1)) == len(z1):
                            print('Array of all NaNs - skipping plot.')
                            continue

                        # Check if the array is all fill values
                        elif len(z1[z1 != fv]) == 0:
                            print('Array of all fill values - skipping plot.')
                            continue

                        else:
                            # remove unreasonable pressure data (e.g. for surface piercing profilers)
                            if zdbar:
                                po_ind = (0 < y1) & (y1 < zdbar)
                                tm = time1[po_ind]
                                y = y1[po_ind]
                                z = z1[po_ind]
                                ds_lat = ds_lat1[po_ind]
                                ds_lon = ds_lon1[po_ind]
                            else:
                                tm = time1
                                y = y1
                                z = z1
                                ds_lat = ds_lat1
                                ds_lon = ds_lon1

                            # reject erroneous data
                            dtime, zpressure, ndata, lenfv, lennan, lenev, lengr, global_min, global_max, lat, lon = \
                                cf.reject_erroneous_data(r, sv, tm, y, z, fv, ds_lat, ds_lon)

                            # get rid of 0.0 data
                            # if sv == 'salinity':
                            #     ind = ndata > 20
                            # elif sv == 'density':
                            #     ind = ndata > 1010
                            # elif sv == 'conductivity':
                            #     ind = ndata > 2
                            # else:
                            #     ind = ndata > 0
                            # if sv == 'sci_flbbcd_chlor_units':
                            #     ind = ndata < 7.5
                            # elif sv == 'sci_flbbcd_cdom_units':
                            #     ind = ndata < 25
                            # else:
                            #     ind = ndata > 0.0

                            if 'CTD' in r:
                                ind = zpressure > 0.0
                            else:
                                ind = ndata > 0.0

                            lenzero = np.sum(~ind)
                            dtime = dtime[ind]
                            zpressure = zpressure[ind]
                            ndata = ndata[ind]
                            if ds_lat is not None and ds_lon is not None:
                                lat = lat[ind]
                                lon = lon[ind]
                            else:
                                lat = None
                                lon = None

                            if len(dtime) > 0:
                                # reject time range from data portal file export
                                t_portal, z_portal, y_portal, lat_portal, lon_portal = \
                                    cf.reject_timestamps_dataportal(subsite, r, dtime, zpressure, ndata, lat, lon)

                                print(
                                    'removed {} data points using visual inspection of data'
                                    .format(len(ndata) - len(z_portal)))

                                # create data groups
                                # if len(y_portal) > 0:
                                #     columns = ['tsec', 'dbar', str(sv)]
                                #     min_r = int(round(np.nanmin(y_portal) - zcell_size))
                                #     max_r = int(round(np.nanmax(y_portal) + zcell_size))
                                #     ranges = list(range(min_r, max_r, zcell_size))
                                #
                                #     groups, d_groups = gt.group_by_depth_range(t_portal, y_portal, z_portal, columns, ranges)
                                #
                                #     if 'scatter' in sv:
                                #         n_std = None  # to use percentile
                                #     else:
                                #         n_std = n_std
                                #
                                #     #  get percentile analysis for printing on the profile plot
                                #     y_avg, n_avg, n_min, n_max, n0_std, n1_std, l_arr, time_ex = cf.reject_timestamps_in_groups(
                                #         groups, d_groups, n_std, inpercentile)
                            """
                            Plot all data
                            """
                            if len(time1) > 0:
                                cf.create_dir(save_dir_profile)
                                cf.create_dir(save_dir_xsection)
                                sname = '-'.join((r, method, sv))
                                # sfileall = '_'.join(('all_data', sname, pd.to_datetime(time1.min()).strftime('%Y%m%d')))
                                # tm0 = pd.to_datetime(time1.min()).strftime('%Y-%m-%dT%H:%M:%S')
                                # tm1 = pd.to_datetime(time1.max()).strftime('%Y-%m-%dT%H:%M:%S')
                                sfileall = '_'.join(
                                    (sname, pd.to_datetime(
                                        t_portal.min()).strftime('%Y%m%d')))
                                tm0 = pd.to_datetime(t_portal.min()).strftime(
                                    '%Y-%m-%dT%H:%M:%S')
                                tm1 = pd.to_datetime(t_portal.max()).strftime(
                                    '%Y-%m-%dT%H:%M:%S')
                                title = ' '.join(
                                    (deployment, refdes,
                                     method)) + '\n' + tm0 + ' to ' + tm1
                                if 'SPKIR' in r:
                                    title = title + '\nWavelength = 510 nm'
                                '''
                                profile plot
                                '''
                                xlabel = sv + " (" + sv_units + ")"
                                ylabel = pname + " (" + y_units + ")"
                                clabel = 'Time'

                                # fig, ax = pf.plot_profiles(z1, y1, time1, ylabel, xlabel, clabel, stdev=None)
                                fig, ax = pf.plot_profiles(z_portal,
                                                           y_portal,
                                                           t_portal,
                                                           ylabel,
                                                           xlabel,
                                                           clabel,
                                                           stdev=None)

                                ax.set_title(title, fontsize=9)
                                fig.tight_layout()
                                pf.save_fig(save_dir_profile, sfileall)
                                '''
                                xsection plot
                                '''
                                clabel = sv + " (" + sv_units + ")"
                                ylabel = pname + " (" + y_units + ")"

                                # fig, ax, bar = pf.plot_xsection(subsite, time1, y1, z1, clabel, ylabel, t_eng=None,
                                #                                 m_water_depth=None, inpercentile=None, stdev=None)
                                fig, ax, bar = pf.plot_xsection(
                                    subsite,
                                    t_portal,
                                    y_portal,
                                    z_portal,
                                    clabel,
                                    ylabel,
                                    t_eng=None,
                                    m_water_depth=None,
                                    inpercentile=None,
                                    stdev=None)

                                if fig:
                                    ax.set_title(title, fontsize=9)
                                    fig.tight_layout()
                                    pf.save_fig(save_dir_xsection, sfileall)
                            """

Example #5

Show file

File: plot_profile_xsection_rm_suspect_data.py Project: ooi-data-lab/data-review-tools

def main(url_list, sDir, deployment_num, start_time, end_time, preferred_only,
         zdbar, n_std, inpercentile, zcell_size):
    rd_list = []
    for uu in url_list:
        elements = uu.split('/')[-2].split('-')
        rd = '-'.join((elements[1], elements[2], elements[3], elements[4]))
        if rd not in rd_list and 'ENG' not in rd and 'ADCP' not in rd:
            rd_list.append(rd)

    for r in rd_list:
        print('\n{}'.format(r))
        datasets = []
        for u in url_list:
            splitter = u.split('/')[-2].split('-')
            rd_check = '-'.join(
                (splitter[1], splitter[2], splitter[3], splitter[4]))
            if rd_check == r:
                udatasets = cf.get_nc_urls([u])
                datasets.append(udatasets)
        datasets = list(itertools.chain(*datasets))
        fdatasets = []
        if preferred_only == 'yes':
            # get the preferred stream information
            ps_df, n_streams = cf.get_preferred_stream_info(r)
            for index, row in ps_df.iterrows():
                for ii in range(n_streams):
                    try:
                        rms = '-'.join((r, row[ii]))
                    except TypeError:
                        continue
                    for dd in datasets:
                        spl = dd.split('/')[-2].split('-')
                        catalog_rms = '-'.join(
                            (spl[1], spl[2], spl[3], spl[4], spl[5], spl[6]))
                        fdeploy = dd.split('/')[-1].split('_')[0]
                        if rms == catalog_rms and fdeploy == row['deployment']:
                            fdatasets.append(dd)
        else:
            fdatasets = datasets

        main_sensor = r.split('-')[-1]
        fdatasets_sel = cf.filter_collocated_instruments(
            main_sensor, fdatasets)

        for fd in fdatasets_sel:
            part_d = fd.split('/')[-1]
            print('\n{}'.format(part_d))
            ds = xr.open_dataset(fd, mask_and_scale=False)
            ds = ds.swap_dims({'obs': 'time'})

            fname, subsite, refdes, method, stream, deployment = cf.nc_attributes(
                fd)
            array = subsite[0:2]
            sci_vars = cf.return_science_vars(stream)

            # if 'CE05MOAS' in r or 'CP05MOAS' in r:  # for coastal gliders, get m_water_depth for bathymetry
            #     eng = '-'.join((r.split('-')[0], r.split('-')[1], '00-ENG000000', method, 'glider_eng'))
            #     eng_url = [s for s in url_list if eng in s]
            #     if len(eng_url) == 1:
            #         eng_datasets = cf.get_nc_urls(eng_url)
            #         # filter out collocated datasets
            #         eng_dataset = [j for j in eng_datasets if (eng in j.split('/')[-1] and deployment in j.split('/')[-1])]
            #         if len(eng_dataset) > 0:
            #             ds_eng = xr.open_dataset(eng_dataset[0], mask_and_scale=False)
            #             t_eng = ds_eng['time'].values
            #             m_water_depth = ds_eng['m_water_depth'].values
            #
            #             # m_altimeter_status = 0 means a good reading (not nan or -1)
            #             try:
            #                 eng_ind = ds_eng['m_altimeter_status'].values == 0
            #             except KeyError:
            #                 eng_ind = (~np.isnan(m_water_depth)) & (m_water_depth >= 0)
            #
            #             m_water_depth = m_water_depth[eng_ind]
            #             t_eng = t_eng[eng_ind]
            #
            #             # get rid of any remaining nans or fill values
            #             eng_ind2 = (~np.isnan(m_water_depth)) & (m_water_depth >= 0)
            #             m_water_depth = m_water_depth[eng_ind2]
            #             t_eng = t_eng[eng_ind2]
            #         else:
            #             print('No engineering file for deployment {}'.format(deployment))
            #             m_water_depth = None
            #             t_eng = None
            #     else:
            #         m_water_depth = None
            #         t_eng = None
            # else:
            #     m_water_depth = None
            #     t_eng = None

            if deployment_num is not None:
                if int(int(deployment[-4:])) is not deployment_num:
                    print(type(int(deployment[-4:])), type(deployment_num))
                    continue

            if start_time is not None and end_time is not None:
                ds = ds.sel(time=slice(start_time, end_time))
                if len(ds['time'].values) == 0:
                    print(
                        'No data to plot for specified time range: ({} to {})'.
                        format(start_time, end_time))
                    continue
                stime = start_time.strftime('%Y-%m-%d')
                etime = end_time.strftime('%Y-%m-%d')
                ext = stime + 'to' + etime  # .join((ds0_method, ds1_method
                save_dir_profile = os.path.join(sDir, array, subsite, refdes,
                                                'profile_plots', deployment,
                                                ext)
                save_dir_xsection = os.path.join(sDir, array, subsite, refdes,
                                                 'xsection_plots', deployment,
                                                 ext)
                save_dir_4d = os.path.join(sDir, array, subsite, refdes,
                                           'xsection_plots_4d', deployment,
                                           ext)
            else:
                save_dir_profile = os.path.join(sDir, array, subsite, refdes,
                                                'profile_plots', deployment)
                save_dir_xsection = os.path.join(sDir, array, subsite, refdes,
                                                 'xsection_plots', deployment)
                save_dir_4d = os.path.join(sDir, array, subsite, refdes,
                                           'xsection_plots_4d', deployment)

            texclude_dir = os.path.join(sDir, array, subsite, refdes,
                                        'time_to_exclude')
            cf.create_dir(texclude_dir)

            time1 = ds['time'].values
            try:
                ds_lat1 = ds['lat'].values
            except KeyError:
                ds_lat1 = None
                print('No latitude variable in file')
            try:
                ds_lon1 = ds['lon'].values
            except KeyError:
                ds_lon1 = None
                print('No longitude variable in file')

            # get pressure variable
            pvarname, y1, y_units, press, y_fillvalue = cf.add_pressure_to_dictionary_of_sci_vars(
                ds)

            # prepare file to list timestamps with suspect data  for each data parameter
            stat_data = pd.DataFrame(
                columns=['deployments', 'time_to_exclude'])
            file_exclude = '{}/{}_{}_{}_excluded_timestamps.csv'.format(
                texclude_dir, deployment, refdes, method)
            stat_data.to_csv(file_exclude, index=True)

            # loop through sensor-data parameters
            for sv in sci_vars:
                print(sv)
                if 'pressure' not in sv:
                    z1 = ds[sv].values
                    fv = ds[sv]._FillValue
                    sv_units = ds[sv].units

                    # Check if the array is all NaNs
                    if sum(np.isnan(z1)) == len(z1):
                        print('Array of all NaNs - skipping plot.')
                        continue

                    # Check if the array is all fill values
                    elif len(z1[z1 != fv]) == 0:
                        print('Array of all fill values - skipping plot.')
                        continue

                    else:
                        # remove unreasonable pressure data (e.g. for surface piercing profilers)
                        if zdbar:
                            po_ind = (0 < y1) & (y1 < zdbar)
                            n_zdbar = np.sum(~po_ind)
                            tm = time1[po_ind]
                            y = y1[po_ind]
                            z = z1[po_ind]
                            ds_lat = ds_lat1[po_ind]
                            ds_lon = ds_lon1[po_ind]
                            print('{} in water depth > {} dbar'.format(
                                n_zdbar, zdbar))
                        else:
                            tm = time1
                            y = y1
                            z = z1
                            ds_lat = ds_lat1
                            ds_lon = ds_lon1

                        # reject erroneous data
                        dtime, zpressure, ndata, lenfv, lennan, lenev, lengr, global_min, global_max, lat, lon = \
                            cf.reject_erroneous_data(r, sv, tm, y, z, fv, ds_lat, ds_lon)

                        # get rid of 0.0 data
                        if sv == 'salinity':
                            ind = ndata > 30
                        elif sv == 'density':
                            ind = ndata > 1022.5
                        elif sv == 'conductivity':
                            ind = ndata > 3.45
                        else:
                            ind = ndata > 0
                        # if sv == 'sci_flbbcd_chlor_units':
                        #     ind = ndata < 7.5
                        # elif sv == 'sci_flbbcd_cdom_units':
                        #     ind = ndata < 25
                        # else:
                        #     ind = ndata > 0.0

                        # if 'CTD' in r:
                        #     ind = zpressure > 0.0
                        # else:
                        #     ind = ndata > 0.0

                        lenzero = np.sum(~ind)
                        dtime = dtime[ind]
                        zpressure = zpressure[ind]
                        ndata = ndata[ind]
                        if ds_lat is not None and ds_lon is not None:
                            lat = lat[ind]
                            lon = lon[ind]
                        else:
                            lat = None
                            lon = None

                        if len(dtime) > 0:
                            # reject time range from data portal file export
                            t_portal, z_portal, y_portal, lat_portal, lon_portal = \
                                cf.reject_timestamps_dataportal(subsite, r, dtime, zpressure, ndata, lat, lon)

                            print(
                                'removed {} data points using visual inspection of data'
                                .format(len(ndata) - len(z_portal)))

                            # create data groups
                            if len(y_portal) > 0:
                                columns = ['tsec', 'dbar', str(sv)]
                                min_r = int(round(min(y_portal) - zcell_size))
                                max_r = int(round(max(y_portal) + zcell_size))
                                ranges = list(range(min_r, max_r, zcell_size))

                                groups, d_groups = gt.group_by_depth_range(
                                    t_portal, y_portal, z_portal, columns,
                                    ranges)

                                if 'scatter' in sv:
                                    n_std = None  # to use percentile
                                else:
                                    n_std = n_std

                                #  identifying timestamps from percentile analysis
                                y_avg, n_avg, n_min, n_max, n0_std, n1_std, l_arr, time_ex = cf.reject_timestamps_in_groups(
                                    groups, d_groups, n_std, inpercentile)
                                """
                                writing timestamps to .csv file to use with data_range.py script
                                """
                                if len(time_ex) != 0:
                                    t_exclude = time_ex[0]
                                    for i in range(
                                            len(time_ex))[1:len(time_ex)]:
                                        t_exclude = '{}, {}'.format(
                                            t_exclude, time_ex[i])

                                    stat_data = pd.DataFrame(
                                        {
                                            'deployments': deployment,
                                            'time_to_exclude': t_exclude
                                        },
                                        index=[sv])
                                    stat_data.to_csv(file_exclude,
                                                     index=True,
                                                     mode='a',
                                                     header=False)

                                #  rejecting timestamps from percentile analysis
                                if len(time_ex) > 0:
                                    t_nospct, z_nospct, y_nospct = cf.reject_suspect_data(
                                        t_portal, y_portal, z_portal, time_ex)
                                else:
                                    t_nospct = t_portal
                                    z_nospct = z_portal
                                    y_nospct = y_portal
                                """
                                Plot data
                                """
                                if len(t_nospct) > 0:
                                    if len(t_nospct) != len(dtime):
                                        cf.create_dir(save_dir_profile)
                                        cf.create_dir(save_dir_xsection)
                                        sname = '-'.join((r, method, sv))
                                        sfile = '_'.join(
                                            ('rm_suspect_data', sname,
                                             pd.to_datetime(
                                                 t_nospct.min()).strftime(
                                                     '%Y%m%d')))

                                        t0 = pd.to_datetime(
                                            t_nospct.min()).strftime(
                                                '%Y-%m-%dT%H:%M:%S')
                                        t1 = pd.to_datetime(
                                            t_nospct.max()).strftime(
                                                '%Y-%m-%dT%H:%M:%S')
                                        title = ' '.join(
                                            (deployment, refdes,
                                             method)) + '\n' + t0 + ' to ' + t1

                                        if zdbar:
                                            leg_text = (
                                                'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges '
                                                '[{} - {}], {} unreasonable values'
                                                .format(
                                                    lenfv, lennan, lenev,
                                                    lengr, global_min,
                                                    global_max, lenzero) +
                                                '\nremoved {} in the upper and lower {} percentile of data grouped in {} '
                                                'dbar segments'.format(
                                                    len(z_portal) -
                                                    len(z_nospct),
                                                    inpercentile, zcell_size) +
                                                '\nexcluded {} suspect data points when inspected visually'
                                                .format(
                                                    len(ndata) - len(z_portal))
                                                +
                                                '\nexcluded {} suspect data in water depth greater than {} dbar'
                                                .format(n_zdbar, zdbar), )

                                        elif n_std:
                                            leg_text = (
                                                'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}], '
                                                '{} unreasonable values'.
                                                format(lenfv, lennan, lenev,
                                                       lengr, global_min,
                                                       global_max, lenzero) +
                                                '\nremoved {} data points +/- {} SD of data grouped in {} dbar segments'
                                                .format(
                                                    len(z_portal) -
                                                    len(z_nospct), n_std,
                                                    zcell_size) +
                                                '\nexcluded {} suspect data points when inspected visually'
                                                .format(
                                                    len(ndata) -
                                                    len(z_portal)), )
                                        else:
                                            leg_text = (
                                                'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}], '
                                                '{} unreasonable values'.
                                                format(lenfv, lennan, lenev,
                                                       lengr, global_min,
                                                       global_max, lenzero) +
                                                '\nremoved {} in the upper and lower {} percentile of data grouped in {} dbar segments'
                                                .format(
                                                    len(z_portal) -
                                                    len(z_nospct),
                                                    inpercentile, zcell_size) +
                                                '\nexcluded {} suspect data points when inspected visually'
                                                .format(
                                                    len(ndata) -
                                                    len(z_portal)), )
                                        '''
                                        profile plot
                                        '''
                                        xlabel = sv + " (" + sv_units + ")"
                                        ylabel = press[0] + " (" + y_units[
                                            0] + ")"
                                        clabel = 'Time'

                                        # plot non-erroneous data
                                        print('plotting profile')
                                        fig, ax = pf.plot_profiles(z_nospct,
                                                                   y_nospct,
                                                                   t_nospct,
                                                                   ylabel,
                                                                   xlabel,
                                                                   clabel,
                                                                   stdev=None)

                                        ax.set_title(title, fontsize=9)
                                        ax.plot(n_avg, y_avg, '-k')
                                        #ax.fill_betweenx(y_avg, n0_std, n1_std, color='m', alpha=0.2)
                                        ax.legend(leg_text,
                                                  loc='upper center',
                                                  bbox_to_anchor=(0.5, -0.17),
                                                  fontsize=6)
                                        fig.tight_layout()
                                        pf.save_fig(save_dir_profile, sfile)
                                        '''
                                        xsection plot
                                        '''
                                        print('plotting xsection')
                                        clabel = sv + " (" + sv_units + ")"
                                        ylabel = press[0] + " (" + y_units[
                                            0] + ")"

                                        # plot bathymetry only within data time ranges
                                        # if t_eng is not None:
                                        #     eng_ind = (t_eng >= np.nanmin(t_array)) & (t_eng <= np.nanmax(t_array))
                                        #     t_eng = t_eng[eng_ind]
                                        #     m_water_depth = m_water_depth[eng_ind]

                                        # plot non-erroneous data
                                        fig, ax, bar = pf.plot_xsection(
                                            subsite,
                                            t_nospct,
                                            y_nospct,
                                            z_nospct,
                                            clabel,
                                            ylabel,
                                            t_eng=None,
                                            m_water_depth=None,
                                            inpercentile=inpercentile,
                                            stdev=None)

                                        ax.set_title(title, fontsize=9)
                                        ax.legend(leg_text,
                                                  loc='upper center',
                                                  bbox_to_anchor=(0.5, -0.17),
                                                  fontsize=6)
                                        fig.tight_layout()
                                        pf.save_fig(save_dir_xsection, sfile)

Example #6

Show file

File: plot_all_depth_profiles.py Project: ooi-data-lab/data-review-tools

def main(url_list, sDir, plot_type):
    """""
    URL : path to instrument data by methods
    sDir : path to the directory on your machine to save files
    plot_type: folder name for a plot type

    """ ""
    rd_list = []
    ms_list = []
    for uu in url_list:
        elements = uu.split('/')[-2].split('-')
        rd = '-'.join((elements[1], elements[2], elements[3], elements[4]))
        ms = uu.split(rd + '-')[1].split('/')[0]
        if rd not in rd_list:
            rd_list.append(rd)
        if ms not in ms_list:
            ms_list.append(ms)
    ''' 
    separate different instruments
    '''
    for r in rd_list:
        print('\n{}'.format(r))
        subsite = r.split('-')[0]
        array = subsite[0:2]
        main_sensor = r.split('-')[-1]

        ps_df, n_streams = cf.get_preferred_stream_info(r)

        # read in the analysis file
        dr_data = cf.refdes_datareview_json(r)

        # get end times of deployments
        deployments = []
        end_times = []
        for index, row in ps_df.iterrows():
            deploy = row['deployment']
            deploy_info = get_deployment_information(dr_data, int(deploy[-4:]))
            deployments.append(int(deploy[-4:]))
            end_times.append(pd.to_datetime(deploy_info['stop_date']))

        # get the list of data files and filter out collocated instruments and other streams chat
        datasets = []
        for u in url_list:
            print(u)
            splitter = u.split('/')[-2].split('-')
            rd_check = '-'.join(
                (splitter[1], splitter[2], splitter[3], splitter[4]))
            if rd_check == r:
                udatasets = cf.get_nc_urls([u])
                datasets.append(udatasets)

        datasets = list(itertools.chain(*datasets))
        fdatasets = cf.filter_collocated_instruments(main_sensor, datasets)
        fdatasets = cf.filter_other_streams(r, ms_list, fdatasets)
        '''
        separate the data files by methods
        '''
        for ms in ms_list:  # np.unique(methodstream)
            fdatasets_sel = [x for x in fdatasets if ms in x]

            # create a folder to save figures
            save_dir = os.path.join(sDir, array, subsite, r, plot_type,
                                    ms.split('-')[0])
            cf.create_dir(save_dir)

            # create a dictionary for science variables from analysis file
            stream_sci_vars_dict = dict()
            for x in dr_data['instrument']['data_streams']:
                dr_ms = '-'.join((x['method'], x['stream_name']))
                if ms == dr_ms:
                    stream_sci_vars_dict[dr_ms] = dict(vars=dict())
                    sci_vars = dict()
                    for y in x['stream']['parameters']:
                        if y['data_product_type'] == 'Science Data':
                            sci_vars.update(
                                {y['name']: dict(db_units=y['unit'])})
                    if len(sci_vars) > 0:
                        stream_sci_vars_dict[dr_ms]['vars'] = sci_vars

            # initialize an empty data array for science variables in dictionary
            sci_vars_dict = cd.initialize_empty_arrays(stream_sci_vars_dict,
                                                       ms)

            y_unit = []
            y_name = []
            for fd in fdatasets_sel:
                ds = xr.open_dataset(fd, mask_and_scale=False)
                print('\nAppending data file: {}'.format(fd.split('/')[-1]))
                for var in list(sci_vars_dict[ms]['vars'].keys()):
                    sh = sci_vars_dict[ms]['vars'][var]
                    if ds[var].units == sh['db_units']:
                        if ds[var]._FillValue not in sh['fv']:
                            sh['fv'].append(ds[var]._FillValue)
                        if ds[var].units not in sh['units']:
                            sh['units'].append(ds[var].units)

                        # time
                        t = ds['time'].values
                        t0 = pd.to_datetime(
                            t.min()).strftime('%Y-%m-%dT%H:%M:%S')
                        t1 = pd.to_datetime(
                            t.max()).strftime('%Y-%m-%dT%H:%M:%S')

                        # sci variable
                        z = ds[var].values
                        sh['t'] = np.append(sh['t'], t)
                        sh['values'] = np.append(sh['values'], z)

                        # add pressure to dictionary of sci vars
                        if 'MOAS' in subsite:
                            if 'CTD' in main_sensor:  # for glider CTDs, pressure is a coordinate
                                pressure = 'sci_water_pressure_dbar'
                                y = ds[pressure].values
                                if ds[pressure].units not in y_unit:
                                    y_unit.append(ds[pressure].units)
                                if ds[pressure].long_name not in y_name:
                                    y_name.append(ds[pressure].long_name)
                            else:
                                pressure = 'int_ctd_pressure'
                                y = ds[pressure].values
                                if ds[pressure].units not in y_unit:
                                    y_unit.append(ds[pressure].units)
                                if ds[pressure].long_name not in y_name:
                                    y_name.append(ds[pressure].long_name)
                        else:
                            pressure = pf.pressure_var(ds, ds.data_vars.keys())
                            y = ds[pressure].values
                            if ds[pressure].units not in y_unit:
                                y_unit.append(ds[pressure].units)
                            if ds[pressure].long_name not in y_name:
                                y_name.append(ds[pressure].long_name)

                        sh['pressure'] = np.append(sh['pressure'], y)

            if len(y_unit) != 1:
                print('pressure unit varies!')
            else:
                y_unit = y_unit[0]

            if len(y_name) != 1:
                print('pressure long name varies!')
            else:
                y_name = y_name[0]

            for m, n in sci_vars_dict.items():
                for sv, vinfo in n['vars'].items():
                    print('\nWorking on variable: {}'.format(sv))
                    if len(vinfo['t']) < 1:
                        print('no variable data to plot')
                    else:
                        sv_units = vinfo['units'][0]
                        fv = vinfo['fv'][0]
                        t0 = pd.to_datetime(min(
                            vinfo['t'])).strftime('%Y-%m-%dT%H:%M:%S')
                        t1 = pd.to_datetime(max(
                            vinfo['t'])).strftime('%Y-%m-%dT%H:%M:%S')
                        t = vinfo['t']
                        x = vinfo['values']
                        y = vinfo['pressure']

                    # Check if the array is all NaNs
                    if sum(np.isnan(x)) == len(x):
                        print('Array of all NaNs - skipping plot.')
                        continue

                    # Check if the array is all fill values
                    elif len(x[x != fv]) == 0:
                        print('Array of all fill values - skipping plot.')
                        continue

                    else:
                        # reject fill values
                        fv_ind = x != fv
                        y_nofv = y[fv_ind]
                        t_nofv = t[fv_ind]
                        c_nofv = cm.rainbow(np.linspace(0, 1, len(t[fv_ind])))
                        x_nofv = x[fv_ind]
                        print(len(x) - len(fv_ind), ' fill values')

                        # reject NaNs
                        nan_ind = ~np.isnan(x)
                        t_nofv_nonan = t_nofv[nan_ind]
                        c_nofv_nonan = c_nofv[nan_ind]
                        y_nofv_nonan = y_nofv[nan_ind]
                        x_nofv_nonan = x_nofv[nan_ind]
                        print(len(x) - len(nan_ind), ' NaNs')

                        # reject extreme values
                        ev_ind = cf.reject_extreme_values(x_nofv_nonan)
                        t_nofv_nonan_noev = t_nofv_nonan[ev_ind]
                        c_nofv_nonan_noev = c_nofv_nonan[ev_ind]
                        y_nofv_nonan_noev = y_nofv_nonan[ev_ind]
                        x_nofv_nonan_noev = x_nofv_nonan[ev_ind]
                        print(len(z) - len(ev_ind), ' Extreme Values', '|1e7|')

                        # reject values outside global ranges:
                        global_min, global_max = cf.get_global_ranges(r, sv)
                        # platform not in qc-table (parad_k_par)
                        # global_min = 0
                        # global_max = 2500
                        print('global ranges for : {}-{}  {} - {}'.format(
                            r, sv, global_min, global_max))
                        if isinstance(global_min, (int, float)) and isinstance(
                                global_max, (int, float)):
                            gr_ind = cf.reject_global_ranges(
                                x_nofv_nonan_noev, global_min, global_max)
                            t_nofv_nonan_noev_nogr = t_nofv_nonan_noev[gr_ind]
                            y_nofv_nonan_noev_nogr = y_nofv_nonan_noev[gr_ind]
                            x_nofv_nonan_noev_nogr = x_nofv_nonan_noev[gr_ind]
                        else:
                            t_nofv_nonan_noev_nogr = t_nofv_nonan_noev
                            y_nofv_nonan_noev_nogr = y_nofv_nonan_noev
                            x_nofv_nonan_noev_nogr = x_nofv_nonan_noev

                    if len(x_nofv_nonan_noev) > 0:
                        if m == 'common_stream_placeholder':
                            sname = '-'.join((r, sv))
                        else:
                            sname = '-'.join((r, m, sv))

                    if sv != 'pressure':
                        columns = ['tsec', 'dbar', str(sv)]
                        bin_size = 10
                        min_r = int(round(min(y_nofv_nonan_noev) - bin_size))
                        max_r = int(round(max(y_nofv_nonan_noev) + bin_size))
                        ranges = list(range(min_r, max_r, bin_size))
                        groups, d_groups = gt.group_by_depth_range(
                            t_nofv_nonan_noev_nogr, y_nofv_nonan_noev_nogr,
                            x_nofv_nonan_noev_nogr, columns, ranges)

                    y_avg, n_avg, n_min, n_max, n0_std, n1_std, l_arr = [], [], [], [], [], [], []
                    tm = 1
                    for ii in range(len(groups)):
                        nan_ind = d_groups[ii + tm].notnull()
                        xtime = d_groups[ii + tm][nan_ind]
                        colors = cm.rainbow(np.linspace(0, 1, len(xtime)))
                        ypres = d_groups[ii + tm + 1][nan_ind]
                        nval = d_groups[ii + tm + 2][nan_ind]
                        tm += 2

                        l_arr.append(len(
                            nval))  # count of data to filter out small groups
                        y_avg.append(ypres.mean())
                        n_avg.append(nval.mean())
                        n_min.append(nval.min())
                        n_max.append(nval.max())
                        n_std = 3
                        n0_std.append(nval.mean() + n_std * nval.std())
                        n1_std.append(nval.mean() - n_std * nval.std())

                    # Plot all data
                    ylabel = y_name + " (" + y_unit + ")"
                    xlabel = sv + " (" + sv_units + ")"
                    clabel = 'Time'

                    fig, ax = pf.plot_profiles(x_nofv_nonan_noev_nogr,
                                               y_nofv_nonan_noev_nogr,
                                               t_nofv_nonan_noev_nogr,
                                               ylabel,
                                               xlabel,
                                               clabel,
                                               end_times,
                                               deployments,
                                               stdev=None)

                    title_text = ' '.join((r, ms.split('-')[-1])) + '\n' \
                                 + t0 + ' - ' + t1 + '\n' + str(bin_size) +\
                                 ' m average and ' + str(n_std) + ' std shown'

                    ax.set_title(title_text, fontsize=9)
                    ax.plot(n_avg, y_avg, '-k')

                    ax.fill_betweenx(y_avg,
                                     n0_std,
                                     n1_std,
                                     color='m',
                                     alpha=0.2)
                    pf.save_fig(save_dir, sname)

                    # Plot data with outliers removed

                    fig, ax = pf.plot_profiles(x_nofv_nonan_noev_nogr,
                                               y_nofv_nonan_noev_nogr,
                                               t_nofv_nonan_noev_nogr,
                                               ylabel,
                                               xlabel,
                                               clabel,
                                               end_times,
                                               deployments,
                                               stdev=5)
                    ax.set_title(' '.join((r, ms.split('-')[-1])) + '\n' \
                                 + t0 + ' - ' + t1, fontsize=9)
                    sfile = '_'.join((sname, 'rmoutliers'))
                    pf.save_fig(save_dir, sfile)

Example #7

Show file

File: plot_deployment_depth_profiles.py Project: ooi-data-lab/data-review-tools

def main(url_list, sDir, plot_type, deployment_num, start_time, end_time, method_num, zdbar, n_std, inpercentile, zcell_size):

    for i, u in enumerate(url_list):
        print('\nUrl {} of {}: {}'.format(i + 1, len(url_list), u))
        elements = u.split('/')[-2].split('-')
        r = '-'.join((elements[1], elements[2], elements[3], elements[4]))
        ms = u.split(r + '-')[1].split('/')[0]
        subsite = r.split('-')[0]
        array = subsite[0:2]
        main_sensor = r.split('-')[-1]

        # read URL to get data
        datasets = cf.get_nc_urls([u])
        datasets_sel = cf.filter_collocated_instruments(main_sensor, datasets)

        # get sci data review list
        dr_data = cf.refdes_datareview_json(r)

        ps_df, n_streams = cf.get_preferred_stream_info(r)

        # get end times of deployments
        deployments = []
        end_times = []
        for index, row in ps_df.iterrows():
            deploy = row['deployment']
            deploy_info = cf.get_deployment_information(dr_data, int(deploy[-4:]))
            deployments.append(int(deploy[-4:]))
            end_times.append(pd.to_datetime(deploy_info['stop_date']))

        # create a dictionary for science variables from analysis file
        stream_sci_vars_dict = dict()
        for x in dr_data['instrument']['data_streams']:
            dr_ms = '-'.join((x['method'], x['stream_name']))
            if ms == dr_ms:
                stream_sci_vars_dict[dr_ms] = dict(vars=dict())
                sci_vars = dict()
                for y in x['stream']['parameters']:
                    if y['data_product_type'] == 'Science Data':
                        sci_vars.update({y['name']: dict(db_units=y['unit'])})
                if len(sci_vars) > 0:
                    stream_sci_vars_dict[dr_ms]['vars'] = sci_vars

        for ii, d in enumerate(datasets_sel):
            part_d = d.split('/')[-1]
            print('\nDataset {} of {}: {}'.format(ii + 1, len(datasets_sel), part_d))
            with xr.open_dataset(d, mask_and_scale=False) as ds:
                ds = ds.swap_dims({'obs': 'time'})

            fname, subsite, refdes, method, stream, deployment = cf.nc_attributes(d)

            if method_num is not None:
                if method != method_num:
                    print(method_num, method)
                    continue


            if deployment_num is not None:
                if int(deployment.split('0')[-1]) is not deployment_num:
                    print(type(int(deployment.split('0')[-1])), type(deployment_num))
                    continue

            if start_time is not None and end_time is not None:
                ds = ds.sel(time=slice(start_time, end_time))
                if len(ds['time'].values) == 0:
                    print('No data to plot for specified time range: ({} to {})'.format(start_time, end_time))
                    continue
                stime = start_time.strftime('%Y-%m-%d')
                etime = end_time.strftime('%Y-%m-%d')
                ext = stime + 'to' + etime  # .join((ds0_method, ds1_method
                save_dir = os.path.join(sDir, array, subsite, refdes, plot_type, ms.split('-')[0], deployment, ext)
            else:
                save_dir = os.path.join(sDir, array, subsite, refdes, plot_type, ms.split('-')[0], deployment)

            cf.create_dir(save_dir)

            texclude_dir = os.path.join(sDir, array, subsite, refdes, 'time_to_exclude')
            cf.create_dir(texclude_dir)

            # initialize an empty data array for science variables in dictionary
            sci_vars_dict = cd.initialize_empty_arrays(stream_sci_vars_dict, ms)

            for var in list(sci_vars_dict[ms]['vars'].keys()):
                sh = sci_vars_dict[ms]['vars'][var]
                if ds[var].units == sh['db_units']:
                    if ds[var]._FillValue not in sh['fv']:
                        sh['fv'].append(ds[var]._FillValue)
                    if ds[var].units not in sh['units']:
                        sh['units'].append(ds[var].units)

                    sh['t'] = np.append(sh['t'], ds['time'].values) # t = ds['time'].values
                    sh['values'] = np.append(sh['values'], ds[var].values)  # z = ds[var].values

                    y, y_unit, y_name = cf.add_pressure_to_dictionary_of_sci_vars(ds)
                    sh['pressure'] = np.append(sh['pressure'], y)

            stat_data = pd.DataFrame(columns=['deployments', 'time_to_exclude'])
            file_exclude = '{}/{}_{}_{}_excluded_timestamps.csv'.format(texclude_dir,
                                                                                   deployment, refdes, method)
            stat_data.to_csv(file_exclude, index=True)
            for m, n in sci_vars_dict.items():
                for sv, vinfo in n['vars'].items():
                    print(sv)
                    if len(vinfo['t']) < 1:
                        print('no variable data to plot')
                    else:
                        sv_units = vinfo['units'][0]
                        fv = vinfo['fv'][0]
                        t0 = pd.to_datetime(min(vinfo['t'])).strftime('%Y-%m-%dT%H:%M:%S')
                        t1 = pd.to_datetime(max(vinfo['t'])).strftime('%Y-%m-%dT%H:%M:%S')
                        colors = cm.rainbow(np.linspace(0, 1, len(vinfo['t'])))
                        t = vinfo['t']
                        z = vinfo['values']
                        y = vinfo['pressure']


                    # Check if the array is all NaNs
                    if sum(np.isnan(z)) == len(z):
                        print('Array of all NaNs - skipping plot.')
                        continue

                    # Check if the array is all fill values
                    elif len(z[z != fv]) == 0:
                        print('Array of all fill values - skipping plot.')
                        continue

                    else:
                        # reject erroneous data
                        dtime, zpressure, ndata, lenfv, lennan, lenev, lengr, global_min, global_max = \
                            cf.reject_erroneous_data(r, sv, t, y, z, fv)


                        # create data groups
                        columns = ['tsec', 'dbar', str(sv)]
                        min_r = int(round(min(zpressure) - zcell_size))
                        max_r = int(round(max(zpressure) + zcell_size))
                        ranges = list(range(min_r, max_r, zcell_size))

                        groups, d_groups = gt.group_by_depth_range(dtime, zpressure, ndata, columns, ranges)
                        #     ... excluding timestamps
                        if 'scatter' in sv:
                            n_std = None #to use percentile
                        else:
                            n_std = n_std

                        #  rejecting timestamps from percentile analysis
                        y_avg, n_avg, n_min, n_max, n0_std, n1_std, l_arr, time_ex, \
                        t_nospct, z_nospct, y_nospct = cf.reject_timestamps_in_groups(groups, d_groups, n_std,
                                                                                      dtime, zpressure, ndata,
                                                                                      inpercentile)
                        print('{} using {} percentile of data grouped in {} dbar segments'.format(
                                                    len(zpressure) - len(z_nospct), inpercentile, zcell_size))

                        """
                        writing timestamps to .csv file to use with data_range.py script
                        """
                        if len(time_ex) != 0:
                            t_exclude = time_ex[0]
                            for i in range(len(time_ex))[1:len(time_ex)]:
                                t_exclude = '{}, {}'.format(t_exclude, time_ex[i])

                            stat_data = pd.DataFrame({'deployments': deployment,
                                                      'time_to_exclude': t_exclude}, index=[sv])
                            stat_data.to_csv(file_exclude, index=True, mode='a', header=False)

                        # reject time range from data portal file export
                        t_portal, z_portal, y_portal = cf.reject_timestamps_dataportal(subsite, r,
                                                                                       t_nospct, z_nospct, y_nospct)
                        print('{} using visual inspection of data'.format(len(z_nospct) - len(z_portal),
                                                                                            inpercentile, zcell_size))

                        # reject data in a depth range
                        if zdbar is not None:
                            y_ind = y_portal < zdbar
                            t_array = t_portal[y_ind]
                            y_array = y_portal[y_ind]
                            z_array = z_portal[y_ind]
                        else:
                            y_ind = []
                            t_array = t_portal
                            y_array = y_portal
                            z_array = z_portal
                        print('{} in water depth > {} dbar'.format(len(y_ind), zdbar))

                    """
                     Plot data
                     """
                    if len(t_array) > 0:
                        if m == 'common_stream_placeholder':
                            sname = '-'.join((sv, r))
                        else:
                            sname = '-'.join((sv, r, m))

                    xlabel = sv + " (" + sv_units + ")"
                    ylabel = y_name[0] + " (" + y_unit[0] + ")"
                    clabel = 'Time'
                    title = ' '.join((deployment, r, m))

                    # plot non-erroneous data
                    fig, ax = pf.plot_profiles(ndata, zpressure, dtime,
                                               ylabel, xlabel, clabel, end_times, deployments, stdev=None)
                    ax.set_title(title, fontsize=9)
                    ax.plot(n_avg, y_avg, '-k')
                    ax.fill_betweenx(y_avg, n0_std, n1_std, color='m', alpha=0.2)
                    leg_text = (
                        'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}]'.format(
                            len(z) - lenfv, len(z) - lennan, len(z) - lenev, lengr, global_min, global_max) + '\n' +
                        ('(black) data average in {} dbar segments'.format(zcell_size)) + '\n' +
                        ('(magenta) upper and lower {} percentile envelope in {} dbar segments'.format(
                                                                                            inpercentile, zcell_size)),)
                    ax.legend(leg_text, loc='upper center', bbox_to_anchor=(0.5, -0.17), fontsize=6)
                    fig.tight_layout()
                    sfile = '_'.join(('rm_erroneous_data', sname))
                    pf.save_fig(save_dir, sfile)

                    # plot excluding time ranges for suspect data
                    if len(z_nospct) != len(zpressure):
                        fig, ax = pf.plot_profiles(z_nospct, y_nospct, t_nospct,
                                                   ylabel, xlabel, clabel, end_times, deployments, stdev=None)

                        ax.set_title(title, fontsize=9)
                        leg_text = (
                         'removed {} in the upper and lower {} percentile of data grouped in {} dbar segments'.format(
                                                             len(zpressure) - len(z_nospct), inpercentile, zcell_size),)
                        ax.legend(leg_text, loc='upper center', bbox_to_anchor=(0.5, -0.17), fontsize=6)
                        fig.tight_layout()
                        sfile = '_'.join(('rm_suspect_data', sname))
                        pf.save_fig(save_dir, sfile)

                    # plot excluding time ranges from data portal export
                    if len(z_nospct) - len(z_portal):
                        fig, ax = pf.plot_profiles(z_portal, y_portal, t_portal,
                                                   ylabel, xlabel, clabel, end_times, deployments, stdev=None)
                        ax.set_title(title, fontsize=9)
                        leg_text = ('excluded {} suspect data when inspected visually'.format(
                                                                                        len(z_nospct) - len(z_portal)),)
                        ax.legend(leg_text, loc='upper center', bbox_to_anchor=(0.5, -0.17), fontsize=6)
                        fig.tight_layout()
                        sfile = '_'.join(('rm_v_suspect_data', sname))
                        pf.save_fig(save_dir, sfile)


                    # Plot excluding a selected depth value
                    if len(z_array) != len(z_array):
                        fig, ax = pf.plot_profiles(z_array, y_array, t_array,
                                                   ylabel, xlabel, clabel, end_times, deployments, stdev=None)

                        ax.set_title(title, fontsize=9)
                        leg_text = ('excluded {} suspect data in water depth greater than {} dbar'.format(len(y_ind), zdbar),)
                        ax.legend(leg_text, loc='upper center', bbox_to_anchor=(0.5, -0.17), fontsize=6)
                        fig.tight_layout()
                        sfile = '_'.join(('rm_depth_range', sname))
                        pf.save_fig(save_dir, sfile)

Example #8

Show file

File: plot_profiles.py Project: ooi-data-lab/data-review-tools

def main(sDir, f, start_time, end_time):
    ff = pd.read_csv(os.path.join(sDir, f))
    url_list = ff['outputUrl'].tolist()
    for i, u in enumerate(url_list):
        print('\nUrl {} of {}: {}'.format(i + 1, len(url_list), u))
        main_sensor = u.split('/')[-2].split('-')[4]
        datasets = cf.get_nc_urls([u])
        datasets_sel = cf.filter_collocated_instruments(main_sensor, datasets)

        for ii, d in enumerate(datasets_sel):
            print('\nDataset {} of {}: {}'.format(ii + 1, len(datasets_sel),
                                                  d))
            with xr.open_dataset(d, mask_and_scale=False) as ds:
                ds = ds.swap_dims({'obs': 'time'})

                if start_time is not None and end_time is not None:
                    ds = ds.sel(time=slice(start_time, end_time))
                    if len(ds['time'].values) == 0:
                        print(
                            'No data to plot for specified time range: ({} to {})'
                            .format(start_time, end_time))
                        continue

                fname, subsite, refdes, method, stream, deployment = cf.nc_attributes(
                    d)
                vars = ds.data_vars.keys()

                if 'MOAS' in subsite and 'CTD' in main_sensor:  # for glider CTDs, pressure is a coordinate
                    pressure = 'sci_water_pressure_dbar'
                else:
                    pressure = pf.pressure_var(ds, vars)

                raw_vars = cf.return_raw_vars(vars)
                raw_vars = [s for s in raw_vars if s not in [pressure]
                            ]  # remove pressure from sci_vars

                save_dir = os.path.join(sDir, subsite, refdes, 'profile_plots',
                                        deployment)
                cf.create_dir(save_dir)

                t = ds['time'].values
                t0 = pd.to_datetime(t.min()).strftime('%Y-%m-%dT%H:%M:%S')
                t1 = pd.to_datetime(t.max()).strftime('%Y-%m-%dT%H:%M:%S')
                title = ' '.join((deployment, refdes, method))

                colors = cm.rainbow(np.linspace(0, 1, len(t)))

                y = ds[pressure]

                print('Plotting variables...')
                for var in raw_vars:
                    print(var)
                    x = ds[var]

                    # Plot all data
                    xlabel = var + " (" + x.units + ")"
                    ylabel = pressure + " (" + y.units + ")"

                    fig, ax = pf.plot_profiles(x,
                                               y,
                                               colors,
                                               ylabel,
                                               xlabel,
                                               stdev=None)
                    ax.set_title((title + '\n' + t0 + ' - ' + t1), fontsize=9)
                    sfile = '_'.join((fname[0:-46], x.name))
                    pf.save_fig(save_dir, sfile)

                    # Plot data with outliers removed
                    fig, ax = pf.plot_profiles(x,
                                               y,
                                               colors,
                                               ylabel,
                                               xlabel,
                                               stdev=5)
                    ax.set_title((title + '\n' + t0 + ' - ' + t1), fontsize=9)
                    sfile = '_'.join((fname[0:-46], x.name, 'rmoutliers'))
                    pf.save_fig(save_dir, sfile)

Example #9

Show file

def main(url_list, sDir, plot_type, deployment_num, start_time, end_time):
    """""
    URL : path to instrument data by methods
    sDir : path to the directory on your machine to save files
    plot_type: folder name for a plot type

    """ ""
    rd_list = []
    ms_list = []
    for uu in url_list:
        elements = uu.split('/')[-2].split('-')
        rd = '-'.join((elements[1], elements[2], elements[3], elements[4]))
        ms = uu.split(rd + '-')[1].split('/')[0]
        if rd not in rd_list:
            rd_list.append(rd)
        if ms not in ms_list:
            ms_list.append(ms)
    ''' 
    separate different instruments
    '''
    for r in rd_list:
        print('\n{}'.format(r))
        subsite = r.split('-')[0]
        array = subsite[0:2]
        main_sensor = r.split('-')[-1]

        ps_df, n_streams = cf.get_preferred_stream_info(r)

        # read in the analysis file
        dr_data = cf.refdes_datareview_json(r)

        # get end times of deployments
        deployments = []
        end_times = []
        for index, row in ps_df.iterrows():
            deploy = row['deployment']
            deploy_info = get_deployment_information(dr_data, int(deploy[-4:]))
            deployments.append(int(deploy[-4:]))
            end_times.append(pd.to_datetime(deploy_info['stop_date']))

        # get the list of data files and filter out collocated instruments and other streams chat
        datasets = []
        for u in url_list:
            print(u)
            splitter = u.split('/')[-2].split('-')
            rd_check = '-'.join(
                (splitter[1], splitter[2], splitter[3], splitter[4]))
            if rd_check == r:
                udatasets = cf.get_nc_urls([u])
                datasets.append(udatasets)

        datasets = list(itertools.chain(*datasets))
        fdatasets = cf.filter_collocated_instruments(main_sensor, datasets)
        fdatasets = cf.filter_other_streams(r, ms_list, fdatasets)
        '''
        separate the data files by methods
        '''
        for ms in ms_list:
            fdatasets_sel = [x for x in fdatasets if ms in x]

            # create a dictionary for science variables from analysis file
            stream_sci_vars_dict = dict()
            for x in dr_data['instrument']['data_streams']:
                dr_ms = '-'.join((x['method'], x['stream_name']))
                if ms == dr_ms:
                    stream_sci_vars_dict[dr_ms] = dict(vars=dict())
                    sci_vars = dict()
                    for y in x['stream']['parameters']:
                        if y['data_product_type'] == 'Science Data':
                            sci_vars.update(
                                {y['name']: dict(db_units=y['unit'])})
                    if len(sci_vars) > 0:
                        stream_sci_vars_dict[dr_ms]['vars'] = sci_vars

            # initialize an empty data array for science variables in dictionary
            sci_vars_dict = cd.initialize_empty_arrays(stream_sci_vars_dict,
                                                       ms)

            print('\nAppending data from files: {}'.format(ms))
            y_unit = []
            y_name = []
            for fd in fdatasets_sel:
                ds = xr.open_dataset(fd, mask_and_scale=False)
                print(fd)

                if start_time is not None and end_time is not None:
                    ds = ds.sel(time=slice(start_time, end_time))
                    if len(ds['time'].values) == 0:
                        print(
                            'No data to plot for specified time range: ({} to {})'
                            .format(start_time, end_time))
                        continue

                fname, subsite, refdes, method, stream, deployment = cf.nc_attributes(
                    fd)

                if deployment_num is not None:
                    if int(deployment.split('0')[-1]) is not deployment_num:
                        print(type(int(deployment.split('0')[-1])),
                              type(deployment_num))
                        continue

                save_dir = os.path.join(sDir, array, subsite, refdes,
                                        plot_type,
                                        ms.split('-')[0], deployment)
                cf.create_dir(save_dir)

                for var in list(sci_vars_dict[ms]['vars'].keys()):
                    sh = sci_vars_dict[ms]['vars'][var]
                    if ds[var].units == sh['db_units']:
                        if ds[var]._FillValue not in sh['fv']:
                            sh['fv'].append(ds[var]._FillValue)
                        if ds[var].units not in sh['units']:
                            sh['units'].append(ds[var].units)

                        # time
                        t = ds['time'].values
                        t0 = pd.to_datetime(
                            t.min()).strftime('%Y-%m-%dT%H:%M:%S')
                        t1 = pd.to_datetime(
                            t.max()).strftime('%Y-%m-%dT%H:%M:%S')

                        # sci variable
                        z = ds[var].values
                        sh['t'] = np.append(sh['t'], t)
                        sh['values'] = np.append(sh['values'], z)

                        # add pressure to dictionary of sci vars
                        if 'MOAS' in subsite:
                            if 'CTD' in main_sensor:  # for glider CTDs, pressure is a coordinate
                                pressure = 'sci_water_pressure_dbar'
                                y = ds[pressure].values
                                if ds[pressure].units not in y_unit:
                                    y_unit.append(ds[pressure].units)
                                if ds[pressure].long_name not in y_name:
                                    y_name.append(ds[pressure].long_name)
                            else:
                                pressure = 'int_ctd_pressure'
                                y = ds[pressure].values
                                if ds[pressure].units not in y_unit:
                                    y_unit.append(ds[pressure].units)
                                if ds[pressure].long_name not in y_name:
                                    y_name.append(ds[pressure].long_name)
                        else:
                            pressure = pf.pressure_var(ds, ds.data_vars.keys())
                            y = ds[pressure].values
                            if ds[pressure].units not in y_unit:
                                y_unit.append(ds[pressure].units)
                            if ds[pressure].long_name not in y_name:
                                y_name.append(ds[pressure].long_name)

                        sh['pressure'] = np.append(sh['pressure'], y)

                if len(y_unit) != 1:
                    print('pressure unit varies UHHHHHHHHH')
                else:
                    y_unit = y_unit[0]

                if len(y_name) != 1:
                    print('pressure long name varies UHHHHHHHHH')
                else:
                    y_name = y_name[0]

                for m, n in sci_vars_dict.items():
                    for sv, vinfo in n['vars'].items():
                        print(sv)
                        if len(vinfo['t']) < 1:
                            print('no variable data to plot')
                        else:
                            sv_units = vinfo['units'][0]
                            fv = vinfo['fv'][0]
                            t0 = pd.to_datetime(min(
                                vinfo['t'])).strftime('%Y-%m-%dT%H:%M:%S')
                            t1 = pd.to_datetime(max(
                                vinfo['t'])).strftime('%Y-%m-%dT%H:%M:%S')
                            t = vinfo['t']
                            z = vinfo['values']
                            y = vinfo['pressure']

                            title = ' '.join((r, ms.split('-')[1]))

                        # Check if the array is all NaNs
                        if sum(np.isnan(z)) == len(z):
                            print('Array of all NaNs - skipping plot.')

                        # Check if the array is all fill values
                        elif len(z[z != fv]) == 0:
                            print('Array of all fill values - skipping plot.')

                        else:
                            # reject fill values
                            fv_ind = z != fv
                            y_nofv = y[fv_ind]
                            t_nofv = t[fv_ind]
                            z_nofv = z[fv_ind]
                            print(len(z) - len(fv_ind), ' fill values')

                            # reject NaNs
                            nan_ind = ~np.isnan(z)
                            t_nofv_nonan = t_nofv[nan_ind]
                            y_nofv_nonan = y_nofv[nan_ind]
                            z_nofv_nonan = z_nofv[nan_ind]
                            print(len(z) - len(nan_ind), ' NaNs')

                            # reject extreme values
                            ev_ind = cf.reject_extreme_values(z_nofv_nonan)
                            t_nofv_nonan_noev = t_nofv_nonan[ev_ind]
                            colors = cm.rainbow(
                                np.linspace(0, 1, len(t_nofv_nonan_noev)))
                            y_nofv_nonan_noev = y_nofv_nonan[ev_ind]
                            z_nofv_nonan_noev = z_nofv_nonan[ev_ind]
                            print(
                                len(z) - len(ev_ind), ' Extreme Values',
                                '|1e7|')

                        if len(y_nofv_nonan_noev) > 0:
                            if m == 'common_stream_placeholder':
                                sname = '-'.join((r, sv))
                            else:
                                sname = '-'.join((r, m, sv))
                        # Plot all data
                        ylabel = y_name + " (" + y_unit + ")"
                        xlabel = sv + " (" + sv_units + ")"
                        clabel = 'Time'
                        clabel = sv + " (" + sv_units + ")"

                        fig, ax = pf.plot_profiles(z_nofv_nonan_noev,
                                                   y_nofv_nonan_noev,
                                                   colors,
                                                   xlabel,
                                                   ylabel,
                                                   stdev=None)
                        ax.set_title((
                            title + '\n' + str(deployment_num) + ': ' + t0 +
                            ' - ' + t1 + '\n' +
                            'used bin = 2 dbar to calculate an average profile (black line) and 3-STD envelope (shaded area)'
                        ),
                                     fontsize=9)

                        # group by depth range
                        columns = ['time', 'pressure', str(sv)]
                        # ranges = [0, 50, 100, 200, 400, 600]
                        ranges = list(
                            range(int(round(min(y_nofv_nonan_noev))),
                                  int(round(max(y_nofv_nonan_noev))), 1))
                        groups, d_groups = gt.group_by_depth_range(
                            t_nofv_nonan_noev, y_nofv_nonan_noev,
                            z_nofv_nonan_noev, columns, ranges)

                        # describe_file = '_'.join((sname, 'statistics.csv'))
                        # # groups.describe().to_csv(save_dir + '/' + describe_file)
                        ind = groups.describe()[sv]['mean'].notnull()
                        groups.describe()[sv][ind].to_csv(
                            '{}/{}_statistics.csv'.format(save_dir, sname),
                            index=True)

                        tm = 1
                        fig, ax = pyplot.subplots(nrows=2, ncols=1)
                        pyplot.margins(y=.08, x=.02)
                        pyplot.grid()
                        y_avg, n_avg, n_min, n_max, n0_std, n1_std, l_arr = [], [], [], [], [], [], []

                        for ii in range(len(groups)):

                            nan_ind = d_groups[ii + tm].notnull()
                            xtime = d_groups[ii + tm][nan_ind]
                            colors = cm.rainbow(np.linspace(0, 1, len(xtime)))
                            ypres = d_groups[ii + tm + 1][nan_ind]
                            nval = d_groups[ii + tm + 2][nan_ind]
                            tm += 2

                            # fig, ax = pf.plot_xsection(subsite, xtime, ypres, nval, clabel, ylabel, stdev=None)
                            # ax.set_title((title + '\n' + t0 + ' - ' + t1), fontsize=9)

                            # pf.plot_profiles(nval, ypres, colors, ylabel, clabel, stdev=None)
                            # ax.set_title((title + '\n' + t0 + ' - ' + t1), fontsize=9)

                            ind2 = cf.reject_outliers(nval, 5)
                            xD = nval[ind2]
                            yD = ypres[ind2]
                            nZ = colors[ind2]
                            outliers = str(len(nval) - len(xD))
                            leg_text = ('removed {} outliers (SD={})'.format(
                                outliers, stdev), )

                            ax.scatter(xD, yD, c=nZ, s=2, edgecolor='None')
                            ax.invert_yaxis()
                            ax.set_xlabel(clabel, fontsize=9)
                            ax.set_ylabel(ylabel, fontsize=9)
                            ax.legend(leg_text, loc='best', fontsize=6)
                            ax.set_title((title + '\n' + t0 + ' - ' + t1),
                                         fontsize=9)

                            l_arr.append(
                                len(nval)
                            )  #  count of data to filter out small groups
                            y_avg.append(ypres.mean())
                            n_avg.append(nval.mean())
                            n_min.append(nval.min())
                            n_max.append(nval.max())
                            n0_std.append(nval.mean() + 3 * nval.std())
                            n1_std.append(nval.mean() - 3 * nval.std())

                        ax.plot(n_avg, y_avg, '-k')
                        # ax.plot(n_min, y_avg, '-b')
                        # ax.plot(n_max, y_avg, '-b')
                        ax.fill_betweenx(y_avg,
                                         n0_std,
                                         n1_std,
                                         color='m',
                                         alpha=0.2)
                        sfile = '_'.join((sname, 'statistics'))
                        pf.save_fig(save_dir, sfile)