def plot_ctdmo(data_dict, var, stdev=None):
    colors10 = [
        'red', 'firebrick', 'orange', 'mediumseagreen', 'blue', 'darkgreen',
        'purple', 'indigo', 'slategray', 'black'
    ]

    colors16 = [
        'red', 'firebrick', 'orange', 'gold', 'mediumseagreen', 'darkcyan',
        'blue', 'darkgreen', 'purple', 'lightgray', 'slategray', 'black',
        'coral', 'gold', 'limegreen', 'midnightblue'
    ]

    fig, ax1 = plt.subplots()
    sensor_list = []
    median_list = []

    for i, (key, value) in enumerate(data_dict.items()):
        if len(data_dict) < 11:
            colors = colors10
        else:
            colors = colors16
        t = value['time']
        y = value['yD']
        if stdev != None:
            ind = cf.reject_outliers(value['yD'], stdev)
            t = t[ind]
            y = y[ind]

        refdes = str(key)
        sensor_list.append(refdes.split('-')[-1])
        median_list.append(value['median'])

        plt.scatter(t, y, c=colors[i], marker='.', s=.5)

        if i == len(data_dict) - 1:  # if the last dataset has been plotted
            plt.grid()
            plt.margins(y=.05, x=.05)

            # refdes on secondary y-axis only for pressure and density
            if var in ['ctdmo_seawater_pressure', 'density']:
                ax2 = ax1.twinx()
                ax2.set_ylim(ax1.get_ylim())
                plt.yticks(median_list, sensor_list, fontsize=7.5)
                plt.subplots_adjust(right=.85)

            pf.format_date_axis(ax1, fig)
            pf.y_axis_disable_offset(ax1)

            subsite = refdes.split('-')[0]
            title = subsite + ' ' + ('-'.join(
                (value['dms'].split('-')[0], value['dms'].split('-')[1])))
            ax1.set_ylabel((var + " (" + value['yunits'] + ")"), fontsize=9)
            ax1.set_title(title, fontsize=10)

            fname = '-'.join((subsite, value['dms'], var))
            if stdev != None:
                fname = '-'.join((fname, 'outliers_rejected'))
            sdir = os.path.join(sDir, subsite, value['dms'].split('-')[0])
            cf.create_dir(sdir)
            pf.save_fig(sdir, fname)
Ejemplo n.º 2
0
def main(files, out):
    """
    files: url to an .nc/.ncml file or the path to a text file containing .nc/.ncml links. A # at the front will skip links in the text file.
    out: Directory to save plots
    """
    fname, ext = os.path.splitext(files)
    if ext in '.nc':
        list_files = [files]
    elif ext in '.ncml':
        list_files = [files]
    else:
        list_files = read_file(files)

    stream_vars = pf.load_variable_dict(var='eng')  # load engineering variables
    # for nc in list_files:
    #     print nc

        # the engine that xarray uses can be changed as specified here 
        # http://xarray.pydata.org/en/stable/generated/xarray.open_dataset.html#xarray.open_dataset

    with xr.open_mfdataset(list_files, engine='netcdf4') as ds_disk:

        # change dimensions from 'obs' to 'time'
        ds_disk = ds_disk.swap_dims({'obs': 'time'})
        ds_variables = ds_disk.data_vars.keys()  # List of dataset variables
        stream = ds_disk.stream  # List stream name associated with the data
        title_pre = mk_str(ds_disk.attrs, 't')  # , var, tt0, tt1, 't')
        save_pre = mk_str(ds_disk.attrs, 's')  # , var, tt0, tt1, 's')
        save_dir = os.path.join(out, ds_disk.subsite, ds_disk.node, ds_disk.stream, 'pcolor')
        cf.create_dir(save_dir)

        # t0, t1 = cf.get_rounded_start_and_end_times(ds_disk['time'].data)
        # tI = t0 + t1 - (t0 / 2)
        # time_list = [[t0, t1], [t0, tI], [tI, t1]]
        # time_list = [[t0, t1]]

        # for period in time_list:
        #     tt0 = period[0]
        #     tt1 = period[1]
        #     sub_ds = ds_disk.sel(time=slice(str(tt0), str(tt1)))
        bins = ds_disk['bin_depths']
        north = ds_disk['northward_seawater_velocity']
        east = ds_disk['eastward_seawater_velocity']
        # up = ds_disk['upward_seawater_velocity']
        # error = ds_disk['error_velocity']

        time = dict(data=ds_disk['time'].data, info=dict(label=ds_disk['time'].standard_name, units='GMT'))
        bins = dict(data=bins.data.T, info=dict(label=bins.long_name, units=bins.units))
        north = dict(data=north.data.T, info=dict(label=north.long_name, units=north.units))
        east = dict(data=east.data.T, info=dict(label=east.long_name, units=east.units))
        # up = dict(data=up.data.T, info=dict(label=up.long_name, units=up.units))
        # error = dict(data=error.data.T, info=dict(label=error.long_name, units=error.units))

        sname = save_pre + 'ADCP'
        title = title_pre
        fig, axs = pf.adcp(time, bins, north, east, title)
        pf.resize(width=12, height=8.5)  # Resize figure
        pf.save_fig(save_dir, sname, res=250)  # Save figure
        plt.close('all')
def main(sDir, f):
    ff = pd.read_csv(os.path.join(sDir, f))
    datasets = cf.get_nc_urls(ff['outputUrl'].tolist())
    for d in datasets:
        print(d)
        fname, subsite, refdes, method, stream, deployment = cf.nc_attributes(
            d)
        save_dir = os.path.join(sDir, subsite, refdes, deployment)
        cf.create_dir(save_dir)

        sci_vars = cf.return_science_vars(stream)

        colors = cm.jet(np.linspace(0, 1, len(sci_vars)))

        with xr.open_dataset(d, mask_and_scale=False) as ds:
            ds = ds.swap_dims({'obs': 'time'})
            t = ds['time'].data
            t0 = pd.to_datetime(t.min()).strftime('%Y-%m-%dT%H:%M:%S')
            t1 = pd.to_datetime(t.max()).strftime('%Y-%m-%dT%H:%M:%S')
            title = ' '.join((deployment, refdes, method))

            fig, ax = plt.subplots()
            axes = [ax]
            for i in range(len(sci_vars)):
                if i > 0:
                    axes.append(ax.twinx()
                                )  # twin the x-axis to make independent y-axes

            fig.subplots_adjust(right=0.6)
            right_additive = (0.98 - 0.6) / float(5)

            for i in range(len(sci_vars)):
                if i > 0:
                    axes[i].spines['right'].set_position(
                        ('axes', 1. + right_additive * i))
                y = ds[sci_vars[i]]

                ind = cf.reject_outliers(y, 5)
                yD = y.data[ind]
                x = t[ind]

                #yD = y.data
                c = colors[i]
                axes[i].plot(x, yD, '.', markersize=2, color=c)
                axes[i].set_ylabel((y.name + " (" + y.units + ")"),
                                   color=c,
                                   fontsize=9)
                axes[i].tick_params(axis='y', colors=c)
                if i == len(
                        sci_vars) - 1:  # if the last variable has been plotted
                    pf.format_date_axis(axes[i], fig)

            axes[0].set_title((title + '\n' + t0 + ' - ' + t1), fontsize=9)
            sfile = '_'.join((fname, 'timeseries'))
            pf.save_fig(save_dir, sfile)
Ejemplo n.º 4
0
def main(url_list, sDir, deployment_num, start_time, end_time, preferred_only,
         n_std, surface_params, depth_params):
    rd_list = []
    for uu in url_list:
        elements = uu.split('/')[-2].split('-')
        rd = '-'.join((elements[1], elements[2], elements[3], elements[4]))
        if rd not in rd_list and 'ENG' not in rd:
            rd_list.append(rd)

    for r in rd_list:
        print('\n{}'.format(r))
        datasets = []
        for u in url_list:
            splitter = u.split('/')[-2].split('-')
            rd_check = '-'.join(
                (splitter[1], splitter[2], splitter[3], splitter[4]))
            if rd_check == r:
                udatasets = cf.get_nc_urls([u])
                datasets.append(udatasets)
        datasets = list(itertools.chain(*datasets))
        fdatasets = []
        if preferred_only == 'yes':
            # get the preferred stream information
            ps_df, n_streams = cf.get_preferred_stream_info(r)
            for index, row in ps_df.iterrows():
                for ii in range(n_streams):
                    try:
                        rms = '-'.join((r, row[ii]))
                    except TypeError:
                        continue
                    for dd in datasets:
                        spl = dd.split('/')[-2].split('-')
                        catalog_rms = '-'.join(
                            (spl[1], spl[2], spl[3], spl[4], spl[5], spl[6]))
                        fdeploy = dd.split('/')[-1].split('_')[0]
                        if rms == catalog_rms and fdeploy == row['deployment']:
                            fdatasets.append(dd)
        else:
            fdatasets = datasets

        main_sensor = r.split('-')[-1]
        fdatasets_sel = cf.filter_collocated_instruments(
            main_sensor, fdatasets)

        for fd in fdatasets_sel:
            part_d = fd.split('/')[-1]
            print('\n{}'.format(part_d))
            ds = xr.open_dataset(fd, mask_and_scale=False)
            ds = ds.swap_dims({'obs': 'time'})

            fname, subsite, refdes, method, stream, deployment = cf.nc_attributes(
                fd)
            array = subsite[0:2]
            sci_vars = cf.return_science_vars(stream)

            if 'CE05MOAS' in r or 'CP05MOAS' in r:  # for coastal gliders, get m_water_depth for bathymetry
                eng = '-'.join((r.split('-')[0], r.split('-')[1],
                                '00-ENG000000', method, 'glider_eng'))
                eng_url = [s for s in url_list if eng in s]
                if len(eng_url) == 1:
                    eng_datasets = cf.get_nc_urls(eng_url)
                    # filter out collocated datasets
                    eng_dataset = [
                        j for j in eng_datasets
                        if (eng in j.split('/')[-1]
                            and deployment in j.split('/')[-1])
                    ]
                    if len(eng_dataset) > 0:
                        ds_eng = xr.open_dataset(eng_dataset[0],
                                                 mask_and_scale=False)
                        t_eng = ds_eng['time'].values
                        m_water_depth = ds_eng['m_water_depth'].values

                        # m_altimeter_status = 0 means a good reading (not nan or -1)
                        eng_ind = ds_eng['m_altimeter_status'].values == 0
                        m_water_depth = m_water_depth[eng_ind]
                        t_eng = t_eng[eng_ind]
                    else:
                        print('No engineering file for deployment {}'.format(
                            deployment))
                        m_water_depth = None
                        t_eng = None
                else:
                    m_water_depth = None
                    t_eng = None
            else:
                m_water_depth = None
                t_eng = None

            if deployment_num is not None:
                if int(deployment.split('0')[-1]) is not deployment_num:
                    print(type(int(deployment.split('0')[-1])),
                          type(deployment_num))
                    continue

            if start_time is not None and end_time is not None:
                ds = ds.sel(time=slice(start_time, end_time))
                if len(ds['time'].values) == 0:
                    print(
                        'No data to plot for specified time range: ({} to {})'.
                        format(start_time, end_time))
                    continue
                stime = start_time.strftime('%Y-%m-%d')
                etime = end_time.strftime('%Y-%m-%d')
                ext = stime + 'to' + etime  # .join((ds0_method, ds1_method
                save_dir_profile = os.path.join(sDir, array, subsite, refdes,
                                                'profile_plots', deployment,
                                                ext)
                save_dir_xsection = os.path.join(sDir, array, subsite, refdes,
                                                 'xsection_plots', deployment,
                                                 ext)
                save_dir_4d = os.path.join(sDir, array, subsite, refdes,
                                           'xsection_plots_4d', deployment,
                                           ext)
            else:
                save_dir_profile = os.path.join(sDir, array, subsite, refdes,
                                                'profile_plots', deployment)
                save_dir_xsection = os.path.join(sDir, array, subsite, refdes,
                                                 'xsection_plots', deployment)
                save_dir_4d = os.path.join(sDir, array, subsite, refdes,
                                           'xsection_plots_4d', deployment)

            tm = ds['time'].values
            try:
                ds_lat = ds['lat'].values
            except KeyError:
                ds_lat = None
                print('No latitude variable in file')
            try:
                ds_lon = ds['lon'].values
            except KeyError:
                ds_lon = None
                print('No longitude variable in file')

            # get pressure variable
            y, y_units, press = cf.add_pressure_to_dictionary_of_sci_vars(ds)

            for sv in sci_vars:
                print(sv)
                if 'pressure' not in sv:
                    z = ds[sv].values
                    fv = ds[sv]._FillValue
                    sv_units = ds[sv].units

                    # Check if the array is all NaNs
                    if sum(np.isnan(z)) == len(z):
                        print('Array of all NaNs - skipping plot.')
                        continue

                    # Check if the array is all fill values
                    elif len(z[z != fv]) == 0:
                        print('Array of all fill values - skipping plot.')
                        continue

                    else:
                        # reject erroneous data
                        dtime, zpressure, ndata, lenfv, lennan, lenev, lengr, global_min, global_max, lat, lon = \
                            cf.reject_erroneous_data(r, sv, tm, y, z, fv, ds_lat, ds_lon)

                        # get rid of 0.0 data
                        if 'CTD' in r:
                            ind = zpressure > 0.0
                        else:
                            ind = ndata > 0.0

                        lenzero = np.sum(~ind)
                        dtime = dtime[ind]
                        zpressure = zpressure[ind]
                        ndata = ndata[ind]
                        if ds_lat is not None and ds_lon is not None:
                            lat = lat[ind]
                            lon = lon[ind]
                        else:
                            lat = None
                            lon = None

                        t0 = pd.to_datetime(
                            dtime.min()).strftime('%Y-%m-%dT%H:%M:%S')
                        t1 = pd.to_datetime(
                            dtime.max()).strftime('%Y-%m-%dT%H:%M:%S')
                        title = ' '.join((deployment, refdes,
                                          method)) + '\n' + t0 + ' to ' + t1

                        # reject time range from data portal file export
                        t_portal, z_portal, y_portal, lat_portal, lon_portal = \
                            cf.reject_timestamps_dataportal(subsite, r, dtime, zpressure, ndata, lat, lon)

                        print(
                            'removed {} data points using visual inspection of data'
                            .format(len(ndata) - len(z_portal)))

                        # create data groups
                        columns = ['tsec', 'dbar', str(sv)]
                        # min_r = int(round(min(y_portal) - zcell_size))
                        # max_r = int(round(max(y_portal) + zcell_size))
                        # ranges = list(range(min_r, max_r, zcell_size))
                        #ranges = [0, 10, 20, 30, 40, 50, 60, 70, 80, 200]
                        range1 = list(
                            range(surface_params[0], surface_params[1],
                                  surface_params[2]))
                        range2 = list(
                            range(depth_params[0],
                                  depth_params[1] + depth_params[2],
                                  depth_params[2]))
                        ranges = range1 + range2

                        groups, d_groups = gt.group_by_depth_range(
                            t_portal, y_portal, z_portal, columns, ranges)

                        if 'scatter' in sv:
                            n_std = None  # to use percentile
                        else:
                            n_std = n_std

                        #  get percentile analysis for printing on the profile plot
                        inpercentile = [surface_params[3]] * len(
                            range1) + [depth_params[3]] * len(range2)
                        n_std = [surface_params[3]] * len(
                            range1) + [depth_params[3]] * len(range2)
                        y_plt, n_med, n_min, n_max, n0_std, n1_std, l_arr, time_ex = reject_timestamps_in_groups(
                            groups, d_groups, n_std, inpercentile)
                        """
                        Plot all data
                        """
                        if len(tm) > 0:
                            cf.create_dir(save_dir_profile)
                            cf.create_dir(save_dir_xsection)
                            sname = '-'.join((r, method, sv))
                            sfileall = '_'.join(('all_data', sname))
                            '''
                            profile plot
                            '''
                            xlabel = sv + " (" + sv_units + ")"
                            ylabel = press[0] + " (" + y_units[0] + ")"
                            clabel = 'Time'

                            fig, ax = pf.plot_profiles(z,
                                                       y,
                                                       tm,
                                                       ylabel,
                                                       xlabel,
                                                       clabel,
                                                       stdev=None)

                            ax.set_title(title, fontsize=9)
                            fig.tight_layout()
                            pf.save_fig(save_dir_profile, sfileall)
                            '''
                            xsection plot
                            '''
                            clabel = sv + " (" + sv_units + ")"
                            ylabel = press[0] + " (" + y_units[0] + ")"

                            fig, ax, bar = pf.plot_xsection(subsite,
                                                            tm,
                                                            y,
                                                            z,
                                                            clabel,
                                                            ylabel,
                                                            t_eng,
                                                            m_water_depth,
                                                            inpercentile=None,
                                                            stdev=None)

                            ax.set_title(title, fontsize=9)
                            fig.tight_layout()
                            pf.save_fig(save_dir_xsection, sfileall)
                        """
                        Plot cleaned-up data
                        """
                        if len(dtime) > 0:

                            sfile = '_'.join(('rm_erroneous_data', sname))
                            '''
                            profile plot
                            '''
                            xlabel = sv + " (" + sv_units + ")"
                            ylabel = press[0] + " (" + y_units[0] + ")"
                            clabel = 'Time'

                            fig, ax = pf.plot_profiles(z_portal,
                                                       y_portal,
                                                       t_portal,
                                                       ylabel,
                                                       xlabel,
                                                       clabel,
                                                       stdev=None)

                            ax.set_title(title, fontsize=9)
                            ax.plot(n_med, y_plt, '.k')
                            ax.fill_betweenx(y_plt,
                                             n0_std,
                                             n1_std,
                                             color='m',
                                             alpha=0.2)
                            leg_text = (
                                'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}], '
                                '{} zeros'.format(lenfv, lennan, lenev, lengr,
                                                  global_min, global_max,
                                                  lenzero) +
                                '\nexcluded {} suspect data points when inspected visually'
                                .format(len(ndata) - len(z_portal)) +
                                '\n(black) data median in {} dbar segments (break at {} dbar)'
                                .format([surface_params[2], depth_params[2]],
                                        depth_params[0]) +
                                '\n(magenta) upper and lower {} percentile envelope in {} dbar segments'
                                .format(
                                    [surface_params[3], depth_params[3]],
                                    [surface_params[2], depth_params[2]]), )
                            ax.legend(leg_text,
                                      loc='upper center',
                                      bbox_to_anchor=(0.5, -0.17),
                                      fontsize=6)
                            fig.tight_layout()
                            pf.save_fig(save_dir_profile, sfile)
                            '''
                            xsection plot
                            '''
                            clabel = sv + " (" + sv_units + ")"
                            ylabel = press[0] + " (" + y_units[0] + ")"

                            # plot non-erroneous data
                            fig, ax, bar = pf.plot_xsection(subsite,
                                                            t_portal,
                                                            y_portal,
                                                            z_portal,
                                                            clabel,
                                                            ylabel,
                                                            t_eng,
                                                            m_water_depth,
                                                            inpercentile=None,
                                                            stdev=None)

                            ax.set_title(title, fontsize=9)
                            leg_text = (
                                'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}], '
                                '{} zeros'.format(lenfv, lennan, lenev, lengr,
                                                  global_min, global_max,
                                                  lenzero) +
                                '\nexcluded {} suspect data points when inspected visually'
                                .format(len(ndata) - len(z_portal)), )
                            ax.legend(leg_text,
                                      loc='upper center',
                                      bbox_to_anchor=(0.5, -0.17),
                                      fontsize=6)
                            fig.tight_layout()
                            pf.save_fig(save_dir_xsection, sfile)
                            '''
                            4D plot for gliders only
                            '''
                            if 'MOAS' in r:
                                if ds_lat is not None and ds_lon is not None:
                                    cf.create_dir(save_dir_4d)

                                    clabel = sv + " (" + sv_units + ")"
                                    zlabel = press[0] + " (" + y_units[0] + ")"

                                    fig = plt.figure()
                                    ax = fig.add_subplot(111, projection='3d')
                                    sct = ax.scatter(lon_portal,
                                                     lat_portal,
                                                     y_portal,
                                                     c=z_portal,
                                                     s=2)
                                    cbar = plt.colorbar(sct,
                                                        label=clabel,
                                                        extend='both')
                                    cbar.ax.tick_params(labelsize=8)
                                    ax.invert_zaxis()
                                    ax.view_init(25, 32)
                                    ax.invert_xaxis()
                                    ax.invert_yaxis()
                                    ax.set_zlabel(zlabel, fontsize=9)
                                    ax.set_ylabel('Latitude', fontsize=9)
                                    ax.set_xlabel('Longitude', fontsize=9)

                                    ax.set_title(title, fontsize=9)
                                    pf.save_fig(save_dir_4d, sfile)
Ejemplo n.º 5
0
def main(sDir, url_list, start_time, end_time, preferred_only):
    rd_list = []
    for uu in url_list:
        elements = uu.split('/')[-2].split('-')
        rd = '-'.join((elements[1], elements[2], elements[3], elements[4]))
        if rd not in rd_list and 'PRESF' in rd:
            rd_list.append(rd)

    for r in rd_list:
        print('\n{}'.format(r))
        datasets = []
        for u in url_list:
            splitter = u.split('/')[-2].split('-')
            rd_check = '-'.join(
                (splitter[1], splitter[2], splitter[3], splitter[4]))
            if rd_check == r:
                udatasets = cf.get_nc_urls([u])
                for ud in udatasets:  # filter out collocated data files
                    if 'PRESF' in ud.split('/')[-1]:
                        datasets.append(ud)
        fdatasets = []
        if preferred_only == 'yes':
            # get the preferred stream information
            ps_df, n_streams = cf.get_preferred_stream_info(r)
            for index, row in ps_df.iterrows():
                for ii in range(n_streams):
                    try:
                        rms = '-'.join((r, row[ii]))
                    except TypeError:
                        continue
                    for dd in datasets:
                        spl = dd.split('/')[-2].split('-')
                        catalog_rms = '-'.join(
                            (spl[1], spl[2], spl[3], spl[4], spl[5], spl[6]))
                        fdeploy = dd.split('/')[-1].split('_')[0]
                        if rms == catalog_rms and fdeploy == row['deployment']:
                            fdatasets.append(dd)
        else:
            fdatasets = datasets

        fdatasets = np.unique(fdatasets).tolist()
        for fd in fdatasets:
            ds = xr.open_dataset(fd, mask_and_scale=False)
            ds = ds.swap_dims({'obs': 'time'})

            if start_time is not None and end_time is not None:
                ds = ds.sel(time=slice(start_time, end_time))
                if len(ds['time'].values) == 0:
                    print(
                        'No data to plot for specified time range: ({} to {})'.
                        format(start_time, end_time))
                    continue

            fname, subsite, refdes, method, stream, deployment = cf.nc_attributes(
                fd)
            sci_vars = cf.return_science_vars(stream)
            print('\nPlotting {} {}'.format(r, deployment))
            array = subsite[0:2]
            filename = '_'.join(fname.split('_')[:-1])
            save_dir = os.path.join(sDir, array, subsite, refdes,
                                    'timeseries_plots', deployment)
            cf.create_dir(save_dir)

            tm = ds['time'].values
            t0 = pd.to_datetime(tm.min()).strftime('%Y-%m-%dT%H:%M:%S')
            t1 = pd.to_datetime(tm.max()).strftime('%Y-%m-%dT%H:%M:%S')
            title = ' '.join((deployment, refdes, method))

            for var in sci_vars:
                print(var)
                if var != 'id':
                    #if var == 'presf_wave_burst_pressure':
                    y = ds[var]
                    fv = y._FillValue
                    if len(y.dims) == 1:

                        # Check if the array is all NaNs
                        if sum(np.isnan(y.values)) == len(y.values):
                            print('Array of all NaNs - skipping plot.')

                        # Check if the array is all fill values
                        elif len(y[y != fv]) == 0:
                            print('Array of all fill values - skipping plot.')

                        else:
                            # reject fill values
                            ind = y.values != fv
                            t = tm[ind]
                            y = y[ind]

                            # Plot all data
                            fig, ax = pf.plot_timeseries(t,
                                                         y,
                                                         y.name,
                                                         stdev=None)
                            ax.set_title((title + '\n' + t0 + ' - ' + t1),
                                         fontsize=9)
                            sfile = '-'.join((filename, y.name, t0[:10]))
                            pf.save_fig(save_dir, sfile)

                            # Plot data with outliers removed
                            fig, ax = pf.plot_timeseries(t, y, y.name, stdev=5)
                            ax.set_title((title + '\n' + t0 + ' - ' + t1),
                                         fontsize=9)
                            sfile = '-'.join(
                                (filename, y.name, t0[:10])) + '_rmoutliers'
                            pf.save_fig(save_dir, sfile)
                    else:
                        v = y.values.T
                        n_nan = np.sum(np.isnan(v))

                        # convert fill values to nans
                        try:
                            v[v == fv] = np.nan
                        except ValueError:
                            v = v.astype(float)
                            v[v == fv] = np.nan
                        n_fv = np.sum(np.isnan(v)) - n_nan

                        # plot before global ranges are removed
                        fig, ax = pf.plot_presf_2d(tm, v, y.name, y.units)
                        ax.set_title((title + '\n' + t0 + ' - ' + t1),
                                     fontsize=9)
                        sfile = '-'.join((filename, var, t0[:10]))
                        pf.save_fig(save_dir, sfile)

                        # reject data outside of global ranges
                        [g_min, g_max] = cf.get_global_ranges(r, var)
                        if g_min is not None and g_max is not None:
                            v[v < g_min] = np.nan
                            v[v > g_max] = np.nan
                            n_grange = np.sum(np.isnan(v)) - n_fv - n_nan

                            if n_grange > 0:
                                # don't plot if the array is all nans
                                if len(np.unique(
                                        np.isnan(v))) == 1 and np.unique(
                                            np.isnan(v))[0] == True:
                                    continue
                                else:
                                    # plot after global ranges are removed
                                    fig, ax = pf.plot_presf_2d(
                                        tm, v, y.name, y.units)
                                    title2 = 'removed: {} global ranges [{}, {}]'.format(
                                        n_grange, g_min, g_max)
                                    ax.set_title((title + '\n' + t0 + ' - ' +
                                                  t1 + '\n' + title2),
                                                 fontsize=9)
                                    sfile = '-'.join(
                                        (filename, var, t0[:10], 'rmgr'))
                                    pf.save_fig(save_dir, sfile)
Ejemplo n.º 6
0
def plot_map(save_directory, savefile, plt_title, londata, latdata, tm, array, bfiles, plt_type=None, add_box=None):
    #ax = plt.axes(projection=ccrs.PlateCarree())
    fig, ax = plt.subplots(figsize=(8, 8), subplot_kw=dict(projection=ccrs.PlateCarree()))
    plt.subplots_adjust(right=0.85)
    states = cfeature.NaturalEarthFeature(category="cultural", scale="10m",
                                 facecolor="none",
                                 name="admin_1_states_provinces_shp")
    ax.add_feature(states, linewidth=.5, edgecolor="black", facecolor='grey')
    ax.add_feature(cfeature.RIVERS, zorder=10, facecolor='white')
    #gl = ax.gridlines(crs=ccrs.PlateCarree(), draw_labels=True, linewidth=.5, color='gray', alpha=0.5, linestyle='--')
    gl = ax.gridlines(crs=ccrs.PlateCarree(), draw_labels=True)
    gl.xlabels_top = False
    gl.ylabels_right = False
    gl.xlines = False
    gl.ylines = False
    # gl.xlabel_style = {'size': 14.5}
    # gl.ylabel_style = {'size': 14.5}
    ax.coastlines('10m', linewidth=1)

    array_loc = cf.return_array_subsites_standard_loc(array)

    ax.set_title(plt_title, fontsize=10)

    if array == 'CE':
        if add_box == 'yes':
            ax = plot_glider_box(ax, array)
        else:
            lonmin, lonmax = define_extent(array_loc.lon, londata, 'lon')
            latmin, latmax = define_extent(array_loc.lat, latdata, 'lat')
            lims = [lonmin, lonmax, latmin, latmax]
            ax.set_extent(lims, crs=ccrs.PlateCarree())
        gf = os.path.join(bfiles, 'GMRTv3_6_20190510topo_CE.grd')
        grid_file = xr.open_dataset(gf)
        bathy_contours = [-3000, -2500, -2000, -1500, -1000, -50, 0]
    else:
        if array == 'CP':
            lims = [-72.5, -69.5, 38.5, 42]
            gf = os.path.join(bfiles, 'GMRTv3_6_20190510topo_CP.grd')
            grid_file = xr.open_dataset(gf)
            bathy_contours = [-3000, -2500, -2000, -1500, -1000, -50, 0]
        else:
            if plt_type == 'glider_track_drift':
                lonmin, lonmax = define_extent(array_loc.lon, londata, 'lon')
                latmin, latmax = define_extent(array_loc.lat, latdata, 'lat')
                lims = [lonmin, lonmax, latmin, latmax]
                gf = None
            else:
                if array == 'GA':
                    lims = [-43.5, -41.5, -43.5, -42]
                    gf = os.path.join(bfiles, 'GMRTv3_6_20190510topo_GA.grd')
                    grid_file = xr.open_dataset(gf)
                    bathy_contours = [-5500, -5400, -5300, -5200, -5100, -5000]
                elif array == 'GI':
                    lims = [-40.1, -39, 59.2, 60.3]
                    gf = os.path.join(bfiles, 'GMRTv3_6_20190510topo_GI.grd')
                    grid_file = xr.open_dataset(gf)
                    bathy_contours = [-3500, -3250, -3000, -2750, -2500, -2250, -2000]
                elif array == 'GP':
                    gf = os.path.join(bfiles, 'GMRTv3_6_20190513topo_GP.grd')
                    grid_file = xr.open_dataset(gf)
                    bathy_contours = [-4500, -4250, -4000, -3750, -3500, -3250, -3000]
                    lims = [-145.1, -143.95, 49.7, 50.6]
                elif array == 'GS':
                    gf = os.path.join(bfiles, 'GMRTv3_6_20190513topo_GS.grd')
                    grid_file = xr.open_dataset(gf)
                    bathy_contours = [-5500, -5000, -4500, -4000, -3500, -3000, -2500, -2000]
                    lims = [-89.95, -88.65, -54.8, -53.7]
        ax.set_extent(lims, crs=ccrs.PlateCarree())
        ax = plot_glider_box(ax, array)

    if gf:
        gf_lon = grid_file['lon']
        gf_lat = grid_file['lat']
        lon_ind = np.logical_and(gf_lon > ax.get_xlim()[0], gf_lon < ax.get_xlim()[1])
        lat_ind = np.logical_and(gf_lat > ax.get_ylim()[0], gf_lat < ax.get_ylim()[1])
        bathy = grid_file['altitude'][lat_ind, lon_ind].values
        CS = ax.contour(gf_lon[lon_ind], gf_lat[lat_ind], bathy, bathy_contours, colors='gray', linewidths=0.5, alpha=0.5)
        ax.clabel(CS, inline=1, fontsize=8, fmt='%.0f')
        #h = ax.pcolormesh(xx, yy, bathy, cmap='Blues_r', linewidth=0, rasterized=True)
        #h = ax.pcolor(grid_file['altitude'], cmap='Blues_r', alpha=.1)

    sct = plt.scatter(londata, latdata, c=tm, marker='.', s=2, cmap='rainbow', transform=ccrs.Geodetic())
    plt.scatter(array_loc.lon, array_loc.lat, s=45, marker='x', color='k')

    divider = make_axes_locatable(ax)
    cax = divider.new_horizontal(size='5%', pad=0.1, axes_class=plt.Axes)
    fig.add_axes(cax)
    cbar = plt.colorbar(sct, cax=cax, label='Time')
    cbar.ax.set_yticklabels(pd.to_datetime(cbar.ax.get_yticks()).strftime(date_format='%Y-%m-%d'))

    pf.save_fig(save_directory, savefile)
Ejemplo n.º 7
0
def main(files, out):
    """
    files: url to an .nc/.ncml file or the path to a text file containing .nc/.ncml links. A # at the front will skip links in the text file.
    out: Directory to save plots
    """
    fname, ext = os.path.splitext(files)
    if ext in '.nc':
        list_files = [files]
    elif ext in '.ncml':
        list_files = [files]
    else:
        list_files = read_file(files)

    stream_vars = pf.load_variable_dict(
        var='eng')  # load engineering variables
    # for nc in list_files:
    #     print nc

    # the engine that xarray uses can be changed as specified here
    # http://xarray.pydata.org/en/stable/generated/xarray.open_dataset.html#xarray.open_dataset

    with xr.open_mfdataset(list_files, engine='netcdf4') as ds_disk:

        # change dimensions from 'obs' to 'time'
        ds_disk = ds_disk.swap_dims({'obs': 'time'})
        ds_variables = ds_disk.data_vars.keys()  # List of dataset variables
        stream = ds_disk.stream  # List stream name associated with the data
        title_pre = mk_str(ds_disk.attrs, 't')  # , var, tt0, tt1, 't')
        save_pre = mk_str(ds_disk.attrs, 's')  # , var, tt0, tt1, 's')
        save_dir = os.path.join(out, ds_disk.subsite, ds_disk.node,
                                ds_disk.stream, 'pcolor')
        cf.create_dir(save_dir)

        # t0, t1 = cf.get_rounded_start_and_end_times(ds_disk['time'].data)
        # tI = t0 + t1 - (t0 / 2)
        # time_list = [[t0, t1], [t0, tI], [tI, t1]]
        # time_list = [[t0, t1]]

        # for period in time_list:
        #     tt0 = period[0]
        #     tt1 = period[1]
        #     sub_ds = ds_disk.sel(time=slice(str(tt0), str(tt1)))
        bins = ds_disk['bin_depths']
        north = ds_disk['northward_seawater_velocity']
        east = ds_disk['eastward_seawater_velocity']
        # up = ds_disk['upward_seawater_velocity']
        # error = ds_disk['error_velocity']

        time = dict(data=ds_disk['time'].data,
                    info=dict(label=ds_disk['time'].standard_name,
                              units='GMT'))
        bins = dict(data=bins.data.T,
                    info=dict(label=bins.long_name, units=bins.units))
        north = dict(data=north.data.T,
                     info=dict(label=north.long_name, units=north.units))
        east = dict(data=east.data.T,
                    info=dict(label=east.long_name, units=east.units))
        # up = dict(data=up.data.T, info=dict(label=up.long_name, units=up.units))
        # error = dict(data=error.data.T, info=dict(label=error.long_name, units=error.units))

        sname = save_pre + 'ADCP'
        title = title_pre
        fig, axs = pf.adcp(time, bins, north, east, title)
        pf.resize(width=12, height=8.5)  # Resize figure
        pf.save_fig(save_dir, sname, res=250)  # Save figure
        plt.close('all')
def main(url_list, sDir, deployment_num, start_time, end_time, preferred_only,
         n_std, inpercentile, zcell_size, zdbar):
    rd_list = []
    for uu in url_list:
        elements = uu.split('/')[-2].split('-')
        rd = '-'.join((elements[1], elements[2], elements[3], elements[4]))
        if rd not in rd_list and 'ENG' not in rd and 'ADCP' not in rd:
            rd_list.append(rd)

    for r in rd_list:
        print('\n{}'.format(r))
        datasets = []
        deployments = []
        for url in url_list:
            splitter = url.split('/')[-2].split('-')
            rd_check = '-'.join(
                (splitter[1], splitter[2], splitter[3], splitter[4]))
            catalog_rms = '-'.join((r, splitter[-2], splitter[-1]))
            if rd_check == r:
                udatasets = cf.get_nc_urls([url])
                for u in udatasets:  # filter out collocated data files
                    if catalog_rms == u.split('/')[-1].split('_20')[0][15:]:
                        datasets.append(u)
                        deployments.append(
                            int(u.split('/')[-1].split('_')[0][-4:]))
        deployments = np.unique(deployments).tolist()
        fdatasets = []
        if preferred_only == 'yes':
            # get the preferred stream information
            ps_df, n_streams = cf.get_preferred_stream_info(r)
            for index, row in ps_df.iterrows():
                for ii in range(n_streams):
                    try:
                        rms = '-'.join((r, row[ii]))
                    except TypeError:
                        continue
                    for dd in datasets:
                        spl = dd.split('/')[-2].split('-')
                        catalog_rms = '-'.join(
                            (spl[1], spl[2], spl[3], spl[4], spl[5], spl[6]))
                        fdeploy = dd.split('/')[-1].split('_')[0]
                        if rms == catalog_rms and fdeploy == row['deployment']:
                            fdatasets.append(dd)
        else:
            fdatasets = datasets

        main_sensor = r.split('-')[-1]
        fdatasets_sel = cf.filter_collocated_instruments(
            main_sensor, fdatasets)

        for dep in deployments:
            if deployment_num is not None:
                if dep is not deployment_num:
                    print('\nskipping deployment {}'.format(dep))
                    continue
            rdatasets = [
                s for s in fdatasets_sel if 'deployment%04d' % dep in s
            ]
            rdatasets.sort()
            if len(rdatasets) > 0:
                sci_vars_dict = {}
                # rdatasets = rdatasets[0:2]  #### for testing
                for i in range(len(rdatasets)):
                    ds = xr.open_dataset(rdatasets[i], mask_and_scale=False)
                    ds = ds.swap_dims({'obs': 'time'})
                    print('\nAppending data from {}: file {} of {}'.format(
                        'deployment%04d' % dep, i + 1, len(rdatasets)))

                    array = r[0:2]
                    subsite = r.split('-')[0]

                    if start_time is not None and end_time is not None:
                        ds = ds.sel(time=slice(start_time, end_time))
                        if len(ds['time'].values) == 0:
                            print(
                                'No data to plot for specified time range: ({} to {})'
                                .format(start_time, end_time))
                            continue
                        stime = start_time.strftime('%Y-%m-%d')
                        etime = end_time.strftime('%Y-%m-%d')
                        ext = stime + 'to' + etime  # .join((ds0_method, ds1_method
                        save_dir_profile = os.path.join(
                            sDir, array, subsite, r, 'profile_plots',
                            'deployment%04d' % dep, ext)
                        save_dir_xsection = os.path.join(
                            sDir, array, subsite, r, 'xsection_plots',
                            'deployment%04d' % dep, ext)
                    else:
                        save_dir_profile = os.path.join(
                            sDir, array, subsite, r, 'profile_plots',
                            'deployment%04d' % dep)
                        save_dir_xsection = os.path.join(
                            sDir, array, subsite, r, 'xsection_plots',
                            'deployment%04d' % dep)

                    if len(sci_vars_dict) == 0:
                        fname, subsite, refdes, method, stream, deployment = cf.nc_attributes(
                            rdatasets[0])
                        sci_vars = cf.return_science_vars(stream)
                        if 'CTDPF' not in r:
                            sci_vars.append('int_ctd_pressure')
                        sci_vars.append('time')
                        sci_vars = list(np.unique(sci_vars))

                        # initialize the dictionary
                        for sci_var in sci_vars:
                            if sci_var == 'time':
                                sci_vars_dict.update({
                                    sci_var:
                                    dict(values=np.array([],
                                                         dtype=np.datetime64),
                                         units=[],
                                         fv=[])
                                })
                            else:
                                sci_vars_dict.update({
                                    sci_var:
                                    dict(values=np.array([]), units=[], fv=[])
                                })

                    # append data for the deployment into the dictionary
                    for s_v in sci_vars_dict.keys():
                        vv = ds[s_v]
                        try:
                            if vv.units not in sci_vars_dict[s_v]['units']:
                                sci_vars_dict[s_v]['units'].append(vv.units)
                        except AttributeError:
                            print('')
                        try:
                            if vv._FillValue not in sci_vars_dict[s_v]['fv']:
                                sci_vars_dict[s_v]['fv'].append(vv._FillValue)
                                vv_data = vv.values
                                try:
                                    vv_data[
                                        vv_data == vv.
                                        _FillValue] = np.nan  # turn fill values to nans
                                except ValueError:
                                    print('')
                        except AttributeError:
                            print('')

                        if len(vv.dims) > 1:
                            print('Skipping plot: variable has >1 dimension')
                        else:
                            sci_vars_dict[s_v]['values'] = np.append(
                                sci_vars_dict[s_v]['values'], vv.values)

                # plot after appending all data into one file
                data_start = pd.to_datetime(
                    min(sci_vars_dict['time']['values'])).strftime(
                        '%Y-%m-%dT%H:%M:%S')
                data_stop = pd.to_datetime(max(
                    sci_vars_dict['time']['values'])).strftime(
                        '%Y-%m-%dT%H:%M:%S')
                time1 = sci_vars_dict['time']['values']
                ds_lat1 = np.empty(np.shape(time1))
                ds_lon1 = np.empty(np.shape(time1))

                # define pressure variable
                try:
                    pname = 'seawater_pressure'
                    press = sci_vars_dict[pname]
                except KeyError:
                    pname = 'int_ctd_pressure'
                    press = sci_vars_dict[pname]
                y1 = press['values']
                try:
                    y_units = press['units'][0]
                except IndexError:
                    y_units = ''

                for sv in sci_vars_dict.keys():
                    print('')
                    print(sv)
                    if sv not in [
                            'seawater_pressure', 'int_ctd_pressure', 'time'
                    ]:
                        z1 = sci_vars_dict[sv]['values']
                        fv = sci_vars_dict[sv]['fv'][0]
                        sv_units = sci_vars_dict[sv]['units'][0]

                        # Check if the array is all NaNs
                        if sum(np.isnan(z1)) == len(z1):
                            print('Array of all NaNs - skipping plot.')
                            continue

                        # Check if the array is all fill values
                        elif len(z1[z1 != fv]) == 0:
                            print('Array of all fill values - skipping plot.')
                            continue

                        else:
                            # remove unreasonable pressure data (e.g. for surface piercing profilers)
                            if zdbar:
                                po_ind = (0 < y1) & (y1 < zdbar)
                                tm = time1[po_ind]
                                y = y1[po_ind]
                                z = z1[po_ind]
                                ds_lat = ds_lat1[po_ind]
                                ds_lon = ds_lon1[po_ind]
                            else:
                                tm = time1
                                y = y1
                                z = z1
                                ds_lat = ds_lat1
                                ds_lon = ds_lon1

                            # reject erroneous data
                            dtime, zpressure, ndata, lenfv, lennan, lenev, lengr, global_min, global_max, lat, lon = \
                                cf.reject_erroneous_data(r, sv, tm, y, z, fv, ds_lat, ds_lon)

                            # get rid of 0.0 data
                            # if sv == 'salinity':
                            #     ind = ndata > 20
                            # elif sv == 'density':
                            #     ind = ndata > 1010
                            # elif sv == 'conductivity':
                            #     ind = ndata > 2
                            # else:
                            #     ind = ndata > 0
                            # if sv == 'sci_flbbcd_chlor_units':
                            #     ind = ndata < 7.5
                            # elif sv == 'sci_flbbcd_cdom_units':
                            #     ind = ndata < 25
                            # else:
                            #     ind = ndata > 0.0

                            if 'CTD' in r:
                                ind = zpressure > 0.0
                            else:
                                ind = ndata > 0.0

                            lenzero = np.sum(~ind)
                            dtime = dtime[ind]
                            zpressure = zpressure[ind]
                            ndata = ndata[ind]
                            if ds_lat is not None and ds_lon is not None:
                                lat = lat[ind]
                                lon = lon[ind]
                            else:
                                lat = None
                                lon = None

                            if len(dtime) > 0:
                                # reject time range from data portal file export
                                t_portal, z_portal, y_portal, lat_portal, lon_portal = \
                                    cf.reject_timestamps_dataportal(subsite, r, dtime, zpressure, ndata, lat, lon)

                                print(
                                    'removed {} data points using visual inspection of data'
                                    .format(len(ndata) - len(z_portal)))

                                # create data groups
                                # if len(y_portal) > 0:
                                #     columns = ['tsec', 'dbar', str(sv)]
                                #     min_r = int(round(np.nanmin(y_portal) - zcell_size))
                                #     max_r = int(round(np.nanmax(y_portal) + zcell_size))
                                #     ranges = list(range(min_r, max_r, zcell_size))
                                #
                                #     groups, d_groups = gt.group_by_depth_range(t_portal, y_portal, z_portal, columns, ranges)
                                #
                                #     if 'scatter' in sv:
                                #         n_std = None  # to use percentile
                                #     else:
                                #         n_std = n_std
                                #
                                #     #  get percentile analysis for printing on the profile plot
                                #     y_avg, n_avg, n_min, n_max, n0_std, n1_std, l_arr, time_ex = cf.reject_timestamps_in_groups(
                                #         groups, d_groups, n_std, inpercentile)
                            """
                            Plot all data
                            """
                            if len(time1) > 0:
                                cf.create_dir(save_dir_profile)
                                cf.create_dir(save_dir_xsection)
                                sname = '-'.join((r, method, sv))
                                # sfileall = '_'.join(('all_data', sname, pd.to_datetime(time1.min()).strftime('%Y%m%d')))
                                # tm0 = pd.to_datetime(time1.min()).strftime('%Y-%m-%dT%H:%M:%S')
                                # tm1 = pd.to_datetime(time1.max()).strftime('%Y-%m-%dT%H:%M:%S')
                                sfileall = '_'.join(
                                    (sname, pd.to_datetime(
                                        t_portal.min()).strftime('%Y%m%d')))
                                tm0 = pd.to_datetime(t_portal.min()).strftime(
                                    '%Y-%m-%dT%H:%M:%S')
                                tm1 = pd.to_datetime(t_portal.max()).strftime(
                                    '%Y-%m-%dT%H:%M:%S')
                                title = ' '.join(
                                    (deployment, refdes,
                                     method)) + '\n' + tm0 + ' to ' + tm1
                                if 'SPKIR' in r:
                                    title = title + '\nWavelength = 510 nm'
                                '''
                                profile plot
                                '''
                                xlabel = sv + " (" + sv_units + ")"
                                ylabel = pname + " (" + y_units + ")"
                                clabel = 'Time'

                                # fig, ax = pf.plot_profiles(z1, y1, time1, ylabel, xlabel, clabel, stdev=None)
                                fig, ax = pf.plot_profiles(z_portal,
                                                           y_portal,
                                                           t_portal,
                                                           ylabel,
                                                           xlabel,
                                                           clabel,
                                                           stdev=None)

                                ax.set_title(title, fontsize=9)
                                fig.tight_layout()
                                pf.save_fig(save_dir_profile, sfileall)
                                '''
                                xsection plot
                                '''
                                clabel = sv + " (" + sv_units + ")"
                                ylabel = pname + " (" + y_units + ")"

                                # fig, ax, bar = pf.plot_xsection(subsite, time1, y1, z1, clabel, ylabel, t_eng=None,
                                #                                 m_water_depth=None, inpercentile=None, stdev=None)
                                fig, ax, bar = pf.plot_xsection(
                                    subsite,
                                    t_portal,
                                    y_portal,
                                    z_portal,
                                    clabel,
                                    ylabel,
                                    t_eng=None,
                                    m_water_depth=None,
                                    inpercentile=None,
                                    stdev=None)

                                if fig:
                                    ax.set_title(title, fontsize=9)
                                    fig.tight_layout()
                                    pf.save_fig(save_dir_xsection, sfileall)
                            """
Ejemplo n.º 9
0
def main(url_list, sDir, plot_type, start_time, end_time, deployment_num):
    for i, u in enumerate(url_list):
        elements = u.split('/')[-2].split('-')
        r = '-'.join((elements[1], elements[2], elements[3], elements[4]))
        ms = u.split(r + '-')[1].split('/')[0]
        subsite = r.split('-')[0]
        array = subsite[0:2]
        main_sensor = r.split('-')[-1]
        datasets = cf.get_nc_urls([u])
        datasets_sel = cf.filter_collocated_instruments(main_sensor, datasets)

        save_dir = os.path.join(sDir, array, subsite, r, plot_type)
        cf.create_dir(save_dir)
        sname = '-'.join((r, ms, 'track'))

        print('Appending....')
        sh = pd.DataFrame()
        deployments = []
        end_times = []
        for ii, d in enumerate(datasets_sel):
            print('\nDataset {} of {}: {}'.format(ii + 1, len(datasets_sel),
                                                  d.split('/')[-1]))
            ds = xr.open_dataset(d, mask_and_scale=False)
            ds = ds.swap_dims({'obs': 'time'})

            if start_time is not None and end_time is not None:
                ds = ds.sel(time=slice(start_time, end_time))
                if len(ds['time'].values) == 0:
                    print(
                        'No data to plot for specified time range: ({} to {})'.
                        format(start_time, end_time))
                    continue

            fname, subsite, refdes, method, stream, deployment = cf.nc_attributes(
                d)

            if deployment_num is not None:
                if int(deployment.split('0')[-1]) is not deployment_num:
                    print(type(int(deployment.split('0')[-1])),
                          type(deployment_num))
                    continue

            # get end times of deployments
            ps_df, n_streams = cf.get_preferred_stream_info(r)
            dr_data = cf.refdes_datareview_json(r)

            for index, row in ps_df.iterrows():
                deploy = row['deployment']
                deploy_info = cf.get_deployment_information(
                    dr_data, int(deploy[-4:]))
                if int(deploy[-4:]) not in deployments:
                    deployments.append(int(deploy[-4:]))
                if pd.to_datetime(deploy_info['stop_date']) not in end_times:
                    end_times.append(pd.to_datetime(deploy_info['stop_date']))

            data = {'lat': ds['lat'].values, 'lon': ds['lon'].values}
            new_r = pd.DataFrame(data,
                                 columns=['lat', 'lon'],
                                 index=ds['time'].values)
            sh = sh.append(new_r)

        xD = sh.lon.values
        yD = sh.lat.values
        tD = sh.index.values

        clabel = 'Time'
        ylabel = 'Latitude'
        xlabel = 'Longitude'

        fig, ax = pf.plot_profiles(xD,
                                   yD,
                                   tD,
                                   ylabel,
                                   xlabel,
                                   clabel,
                                   end_times,
                                   deployments,
                                   stdev=None)
        ax.invert_yaxis()
        ax.set_title('Glider Track - ' + r + '\n' + 'x: platform location',
                     fontsize=9)
        ax.set_xlim(-71.75, -69.75)
        ax.set_ylim(38.75, 40.75)
        #cbar.ax.set_yticklabels(end_times)

        # add Pioneer glider sampling area
        ax.add_patch(
            Rectangle((-71.5, 39.0),
                      1.58,
                      1.67,
                      linewidth=3,
                      edgecolor='b',
                      facecolor='none'))
        ax.text(-71,
                40.6,
                'Pioneer Glider Sampling Area',
                color='blue',
                fontsize=8)
        # add Pioneer AUV sampling area
        # ax.add_patch(Rectangle((-71.17, 39.67), 0.92, 1.0, linewidth=3, edgecolor='m', facecolor='none'))

        array_loc = cf.return_array_subsites_standard_loc(array)

        ax.scatter(array_loc.lon,
                   array_loc.lat,
                   s=40,
                   marker='x',
                   color='k',
                   alpha=0.3)
        #ax.legend(legn, array_loc.index, scatterpoints=1, loc='lower left', ncol=4, fontsize=8)

        pf.save_fig(save_dir, sname)
def main(files, out, time_break, depth, start, end, interactive):
    """
    files: url to an .nc/.ncml file or the path to a text file containing .nc/.ncml links. A # at the front will skip links in the text file.
    out: Directory to save plots
    """
    fname, ext = os.path.splitext(files)
    if ext in '.nc':
        list_files = [files]
    elif ext in '.ncml':
        list_files = [files]
    else:
        list_files = read_file(files)

    stream_vars = pf.load_variable_dict(
        var='eng')  # load engineering variables
    for nc in list_files:
        print nc
        with xr.open_dataset(nc, mask_and_scale=False) as ds:
            # change dimensions from 'obs' to 'time'
            ds = ds.swap_dims({'obs': 'time'})
            ds_variables = ds.data_vars.keys()  # List of dataset variables
            stream = ds.stream  # List stream name associated with the data
            title_pre = mk_str(ds.attrs, 't')  # , var, tt0, tt1, 't')
            save_pre = mk_str(ds.attrs, 's')  # , var, tt0, tt1, 's')
            platform = ds.subsite
            node = ds.node
            sensor = ds.sensor
            # save_dir = os.path.join(out,'xsection_depth_profiles')
            save_dir = os.path.join(
                out, ds.subsite, ds.subsite + '-' + ds.node + '-' + ds.sensor,
                ds.stream, 'xsection_depth_profiles')
            cf.create_dir(save_dir)

            misc = [
                'quality', 'string', 'timestamp', 'deployment', 'id',
                'provenance', 'qc', 'time', 'mission', 'obs', 'volt', 'ref',
                'sig', 'amp', 'rph', 'calphase', 'phase', 'therm', 'light'
            ]

            reg_ex = re.compile('|'.join(misc))

            #  keep variables that are not in the regular expression
            sci_vars = [s for s in ds_variables if not reg_ex.search(s)]

            if not time_break == None:
                times = np.unique(ds[time_break])

                for t in times:
                    time_ind = t == ds[time_break].data
                    for var in sci_vars:
                        x = dict(data=ds['time'].data[time_ind],
                                 info=dict(label='Time', units='GMT'))
                        t0 = pd.to_datetime(
                            x['data'].min()).strftime('%Y-%m-%dT%H%M%00')
                        t1 = pd.to_datetime(
                            x['data'].max()).strftime('%Y-%m-%dT%H%M%00')
                        try:
                            sci = ds[var]
                            print var
                            # sci = sub_ds[var]
                        except UnicodeEncodeError:  # some comments have latex characters
                            ds[var].attrs.pop(
                                'comment')  # remove from the attributes
                            sci = ds[var]  # or else the variable won't load

                        y = dict(data=ds[depth].data[time_ind],
                                 info=dict(label='Pressure',
                                           units='dbar',
                                           var=var,
                                           platform=platform,
                                           node=node,
                                           sensor=sensor))

                        try:
                            z_lab = sci.long_name
                        except AttributeError:
                            z_lab = sci.standard_name
                        z = dict(data=sci.data[time_ind],
                                 info=dict(label=z_lab,
                                           units=str(sci.units),
                                           var=var,
                                           platform=platform,
                                           node=node,
                                           sensor=sensor))

                        title = title_pre + var

                        # plot timeseries with outliers
                        fig, ax = pf.depth_glider_cross_section(x,
                                                                y,
                                                                z,
                                                                title=title)

                        if interactive == True:
                            fig.canvas.mpl_connect(
                                'pick_event', lambda event: pf.onpick3(
                                    event, x['data'], y['data'], z['data']))
                            plt.show()

                        else:
                            pf.resize(width=12, height=8.5)  # Resize figure
                            save_name = '{}-{}-{}_{}_{}-{}'.format(
                                platform, node, sensor, var, t0, t1)
                            pf.save_fig(save_dir, save_name,
                                        res=150)  # Save figure
                            plt.close('all')

            else:
                ds = ds.sel(time=slice(start, end))

                for var in sci_vars:
                    x = dict(data=ds['time'].data[:],
                             info=dict(label='Time', units='GMT'))
                    t0 = pd.to_datetime(
                        x['data'].min()).strftime('%Y-%m-%dT%H%M%00')
                    t1 = pd.to_datetime(
                        x['data'].max()).strftime('%Y-%m-%dT%H%M%00')
                    try:
                        sci = ds[var]
                        print var
                        # sci = sub_ds[var]
                    except UnicodeEncodeError:  # some comments have latex characters
                        ds[var].attrs.pop(
                            'comment')  # remove from the attributes
                        sci = ds[var]  # or else the variable won't load

                    y = dict(data=ds[depth].data[:],
                             info=dict(label='Pressure',
                                       units='dbar',
                                       var=var,
                                       platform=platform,
                                       node=node,
                                       sensor=sensor))

                    try:
                        z_lab = sci.long_name
                    except AttributeError:
                        z_lab = sci.standard_name
                    z = dict(data=sci.data[:],
                             info=dict(label=z_lab,
                                       units=sci.units,
                                       var=var,
                                       platform=platform,
                                       node=node,
                                       sensor=sensor))

                    title = title_pre + var

                    # plot timeseries with outliers
                    fig, ax = pf.depth_glider_cross_section(
                        x, y, z, title=title, interactive=interactive)

                    if interactive == True:
                        fig.canvas.mpl_connect(
                            'pick_event', lambda event: pf.onpick3(
                                event, x['data'], y['data'], z['data']))
                        plt.show()

                    else:
                        pf.resize(width=12, height=8.5)  # Resize figure
                        save_name = '{}-{}-{}_{}_{}-{}'.format(
                            platform, node, sensor, var, t0, t1)
                        pf.save_fig(save_dir, save_name,
                                    res=150)  # Save figure
                        plt.close('all')
def main(sDir, ncdir, start_time, end_time):
    rd_list = [ncdir.split('/')[-2]]

    for r in rd_list:
        print('\n{}'.format(r))
        datasets = []
        for root, dirs, files in os.walk(ncdir):
            for f in files:
                if f.endswith('.nc'):
                    datasets.append(f)
        # for u in url_list:
        #     splitter = u.split('/')[-2].split('-')
        #     rd_check = '-'.join((splitter[1], splitter[2], splitter[3], splitter[4]))
        #     if rd_check == r:
        #         udatasets = cf.get_nc_urls([u])
        #         datasets.append(udatasets)
        #datasets = list(itertools.chain(*datasets))
        for fd in datasets:
            if '_blank' not in fd:
                ds = xr.open_dataset(os.path.join(ncdir, fd),
                                     mask_and_scale=False)
                ds = ds.swap_dims({'obs': 'time'})
                ds_vars = list(ds.data_vars.keys()) + [
                    x for x in ds.coords.keys() if 'pressure' in x
                ]  # get pressure variable from coordinates
                #raw_vars = cf.return_raw_vars(ds_vars)

                if start_time is not None and end_time is not None:
                    ds = ds.sel(time=slice(start_time, end_time))
                    if len(ds['time'].values) == 0:
                        print(
                            'No data to plot for specified time range: ({} to {})'
                            .format(start_time, end_time))
                        continue

                fname, subsite, refdes, method, stream, deployment = cf.nc_attributes(
                    os.path.join(ncdir, fd))
                if 'NUTNR' in refdes or 'VEL3D in refdes':
                    vars = cf.return_science_vars(stream)
                else:
                    vars = cf.return_raw_vars(ds_vars)
                print('\nPlotting {} {}'.format(r, deployment))
                array = subsite[0:2]
                filename = '_'.join(fname.split('_')[:-1])
                save_dir = os.path.join(sDir, array, subsite, refdes,
                                        'timeseries_plots', deployment)
                cf.create_dir(save_dir)

                tm = ds['time'].values
                t0 = pd.to_datetime(tm.min()).strftime('%Y-%m-%dT%H:%M:%S')
                t1 = pd.to_datetime(tm.max()).strftime('%Y-%m-%dT%H:%M:%S')
                title = ' '.join((deployment, refdes, method))

                for var in vars:
                    print(var)
                    if var not in ['id', 'record_type',
                                   'unique_id']:  # if var != 'id'
                        y = ds[var]
                        try:
                            fv = y._FillValue
                        except AttributeError:
                            fv = np.nan
                        if len(y.dims) == 1:
                            # Check if the array is all NaNs
                            y[y == fv] = np.nan  # turn fill values to nans
                            if sum(np.isnan(y.values)) == len(y.values):
                                print(
                                    'Array of all NaNs and/or fill values - skipping plot.'
                                )

                            # Check if the array is all fill values
                            # elif len(y[y != fv]) == 0:
                            #     print('Array of all fill values - skipping plot.')

                            else:
                                # reject fill values
                                ind = y.values != fv
                                t = tm[ind]
                                y = y[ind]

                                # Plot all data
                                fig, ax = pf.plot_timeseries(t,
                                                             y,
                                                             y.name,
                                                             stdev=None)
                                ax.set_title((title + '\n' + t0 + ' - ' + t1),
                                             fontsize=9)
                                sfile = '-'.join((filename, y.name, t0[:10]))
                                pf.save_fig(save_dir, sfile)

                                # Plot data with outliers removed
                                fig, ax = pf.plot_timeseries(t,
                                                             y,
                                                             y.name,
                                                             stdev=5)
                                ax.set_title((title + '\n' + t0 + ' - ' + t1),
                                             fontsize=9)
                                sfile = '-'.join((filename, y.name,
                                                  t0[:10])) + '_rmoutliers'
                                pf.save_fig(save_dir, sfile)
Ejemplo n.º 12
0
def main(sDir, url_list, start_time, end_time):
    rd_list = []
    for uu in url_list:
        elements = uu.split('/')[-2].split('-')
        rd = '-'.join((elements[1], elements[2], elements[3], elements[4]))
        if rd not in rd_list:
            rd_list.append(rd)

    for r in rd_list:
        print('\n{}'.format(r))
        datasets = []
        for u in url_list:
            splitter = u.split('/')[-2].split('-')
            rd_check = '-'.join((splitter[1], splitter[2], splitter[3], splitter[4]))
            if rd_check == r:
                udatasets = cf.get_nc_urls([u])
                datasets.append(udatasets)
        datasets = list(itertools.chain(*datasets))
        fdatasets = []
        # get the preferred stream information
        ps_df, n_streams = cf.get_preferred_stream_info(r)
        for index, row in ps_df.iterrows():
            for ii in range(n_streams):
                try:
                    rms = '-'.join((r, row[ii]))
                except TypeError:
                    continue
                for dd in datasets:
                    spl = dd.split('/')[-2].split('-')
                    catalog_rms = '-'.join((spl[1], spl[2], spl[3], spl[4], spl[5], spl[6]))
                    fdeploy = dd.split('/')[-1].split('_')[0]
                    if rms == catalog_rms and fdeploy == row['deployment']:
                        fdatasets.append(dd)

        main_sensor = r.split('-')[-1]
        fdatasets_sel = cf.filter_collocated_instruments(main_sensor, fdatasets)

        # get science variable long names from the Data Review Database
        #stream_sci_vars = cd.sci_var_long_names(r)
        if 'SPKIR' in r or 'PRESF' in r:  # only get the main science variable for SPKIR
            stream_vars = cd.sci_var_long_names(r)
        else:
            stream_vars = var_long_names(r)

        # check if the science variable long names are the same for each stream and initialize empty arrays
        sci_vars_dict = cd.sci_var_long_names_check(stream_vars)

        # get the preferred stream information
        ps_df, n_streams = cf.get_preferred_stream_info(r)

        # build dictionary of science data from the preferred dataset for each deployment
        print('\nAppending data from files')
        et = []
        sci_vars_dict, __, __ = cd.append_science_data(ps_df, n_streams, r, fdatasets_sel, sci_vars_dict, et, start_time, end_time)

        # get end times of deployments
        dr_data = cf.refdes_datareview_json(r)
        deployments = []
        dend_times = []
        for index, row in ps_df.iterrows():
            deploy = row['deployment']
            deploy_info = get_deployment_information(dr_data, int(deploy[-4:]))
            deployments.append(int(deploy[-4:]))
            dend_times.append(pd.to_datetime(deploy_info['stop_date']))

        subsite = r.split('-')[0]
        array = subsite[0:2]
        save_dir = os.path.join(sDir, array, subsite, r, 'timeseries_plots_preferred_all')
        cf.create_dir(save_dir)

        print('\nPlotting data')
        for m, n in sci_vars_dict.items():
            for sv, vinfo in n['vars'].items():
                print(sv)
                if 'SPKIR' in r:
                    fv_lst = np.unique(vinfo['fv']).tolist()
                    if len(fv_lst) == 1:
                        fill_value = fv_lst[0]
                    else:
                        print(fv_lst)
                        print('No unique fill value for {}'.format(sv))

                    sv_units = np.unique(vinfo['units']).tolist()

                    t = vinfo['t']
                    if len(t) > 1:
                        data = vinfo['values']
                        [dd_data, g_min, g_max] = index_dataset_2d(r, 'spkir_abj_cspp_downwelling_vector', data, fill_value)
                        t0 = pd.to_datetime(min(t)).strftime('%Y-%m-%dT%H:%M:%S')
                        t1 = pd.to_datetime(max(t)).strftime('%Y-%m-%dT%H:%M:%S')
                        deploy_final = vinfo['deployments']
                        deploy = list(np.unique(deploy_final))
                        deployments = [int(dd) for dd in deploy]

                        sname = '-'.join((r, sv))
                        fig, ax = pf.plot_spkir(t, dd_data, sv, sv_units[0])
                        ax.set_title((r + '\nDeployments: ' + str(sorted(deployments)) + '\n' + t0 + ' - ' + t1 + '\n'
                                      + 'removed global ranges +/- [{} - {}]'.format(g_min, g_max)), fontsize=8)
                        for etimes in dend_times:
                            ax.axvline(x=etimes, color='k', linestyle='--', linewidth=.6)
                        pf.save_fig(save_dir, sname)

                        # plot each wavelength
                        wavelengths = ['412nm', '443nm', '490nm', '510nm', '555nm', '620nm', '683nm']
                        for wvi in range(len(dd_data)):
                            fig, ax = pf.plot_spkir_wv(t, dd_data[wvi], sv, sv_units[0], wvi)
                            ax.set_title(
                                (r + '\nDeployments: ' + str(sorted(deployments)) + '\n' + t0 + ' - ' + t1 + '\n'
                                 + 'removed global ranges +/- [{} - {}]'.format(g_min, g_max)), fontsize=8)
                            for etimes in dend_times:
                                ax.axvline(x=etimes, color='k', linestyle='--', linewidth=.6)
                            snamewvi = '-'.join((sname, wavelengths[wvi]))
                            pf.save_fig(save_dir, snamewvi)

                elif 'presf_abc_wave_burst' in m:
                    fv_lst = np.unique(vinfo['fv']).tolist()
                    if len(fv_lst) == 1:
                        fill_value = fv_lst[0]
                    else:
                        print(fv_lst)
                        print('No unique fill value for {}'.format(sv))

                    sv_units = np.unique(vinfo['units']).tolist()

                    t = vinfo['t']
                    if len(t) > 1:
                        data = vinfo['values']
                        [dd_data, g_min, g_max] = index_dataset_2d(r, 'presf_wave_burst_pressure', data, fill_value)
                        t0 = pd.to_datetime(min(t)).strftime('%Y-%m-%dT%H:%M:%S')
                        t1 = pd.to_datetime(max(t)).strftime('%Y-%m-%dT%H:%M:%S')
                        deploy_final = vinfo['deployments']
                        deploy = list(np.unique(deploy_final))
                        deployments = [int(dd) for dd in deploy]

                        sname = '-'.join((r, sv))
                        fig, ax = pf.plot_presf_2d(t, dd_data, sv, sv_units[0])
                        ax.set_title((r + '\nDeployments: ' + str(sorted(deployments)) + '\n' + t0 + ' - ' + t1 + '\n'
                                      + 'removed global ranges +/- [{} - {}]'.format(g_min, g_max)), fontsize=8)
                        for etimes in dend_times:
                            ax.axvline(x=etimes, color='k', linestyle='--', linewidth=.6)
                        pf.save_fig(save_dir, sname)

                else:
                    if type(vinfo['values']) != dict:  # if the variable is not a 2D array
                        if 'Spectra' not in sv:
                            if len(vinfo['t']) < 1:
                                print('no variable data to plot')
                            else:
                                sv_units = vinfo['units'][0]
                                sv_name = vinfo['var_name']
                                t0 = pd.to_datetime(min(vinfo['t'])).strftime('%Y-%m-%dT%H:%M:%S')
                                t1 = pd.to_datetime(max(vinfo['t'])).strftime('%Y-%m-%dT%H:%M:%S')
                                x = vinfo['t']
                                y = vinfo['values']

                                # reject NaNs and values of 0.0
                                nan_ind = (~np.isnan(y)) & (y != 0.0)
                                x_nonan = x[nan_ind]
                                y_nonan = y[nan_ind]

                                # reject fill values
                                fv_ind = y_nonan != vinfo['fv'][0]
                                x_nonan_nofv = x_nonan[fv_ind]
                                y_nonan_nofv = y_nonan[fv_ind]

                                # reject extreme values
                                Ev_ind = cf.reject_extreme_values(y_nonan_nofv)
                                y_nonan_nofv_nE = y_nonan_nofv[Ev_ind]
                                x_nonan_nofv_nE = x_nonan_nofv[Ev_ind]

                                # reject values outside global ranges:
                                global_min, global_max = cf.get_global_ranges(r, sv_name)
                                if any(e is None for e in [global_min, global_max]):
                                    y_nonan_nofv_nE_nogr = y_nonan_nofv_nE
                                    x_nonan_nofv_nE_nogr = x_nonan_nofv_nE
                                else:
                                    gr_ind = cf.reject_global_ranges(y_nonan_nofv_nE, global_min, global_max)
                                    y_nonan_nofv_nE_nogr = y_nonan_nofv_nE[gr_ind]
                                    x_nonan_nofv_nE_nogr = x_nonan_nofv_nE[gr_ind]

                                if len(y_nonan_nofv) > 0:
                                    if m == 'common_stream_placeholder':
                                        sname = '-'.join((r, sv))
                                    else:
                                        sname = '-'.join((r, m, sv))

                                    plt_deploy = [int(x) for x in list(np.unique(vinfo['deployments']))]

                                    # plot hourly averages for cabled and FDCHP data
                                    if 'streamed' in sci_vars_dict[list(sci_vars_dict.keys())[0]]['ms'][0] or 'FDCHP' in r:
                                        sname = '-'.join((sname, 'hourlyavg'))
                                        df = pd.DataFrame({'dfx': x_nonan_nofv_nE_nogr, 'dfy': y_nonan_nofv_nE_nogr})
                                        dfr = df.resample('H', on='dfx').mean()

                                        # Plot all data
                                        fig, ax = pf.plot_timeseries_all(dfr.index, dfr['dfy'], sv, sv_units, stdev=None)
                                        ax.set_title((r + '\nDeployments: ' + str(plt_deploy) + '\n' + t0 + ' - ' + t1),
                                                     fontsize=8)

                                        # if plotting a specific time range, plot deployment lines only for those deployments
                                        if type(start_time) == dt.datetime:
                                            for e in list(np.unique(vinfo['deployments'])):
                                                etime = dend_times[int(e) - 1]
                                                ax.axvline(x=etime, color='b', linestyle='--', linewidth=.6)
                                        else:
                                            for etime in dend_times:
                                                ax.axvline(x=etime, color='b', linestyle='--', linewidth=.6)
                                        pf.save_fig(save_dir, sname)
                                    else:
                                        # Plot all data
                                        fig, ax = pf.plot_timeseries_all(x_nonan_nofv, y_nonan_nofv, sv, sv_units, stdev=None)
                                        ax.set_title((r + '\nDeployments: ' + str(plt_deploy) + '\n' + t0 + ' - ' + t1),
                                                     fontsize=8)

                                        # if plotting a specific time range, plot deployment lines only for those deployments
                                        if type(start_time) == dt.datetime:
                                            # for e in list(np.unique(vinfo['deployments'])):
                                            #     etime = dend_times[int(e) - 1]
                                            #     ax.axvline(x=etime, color='b', linestyle='--', linewidth=.6)
                                            etime = dend_times[int(list(np.unique(vinfo['deployments']))[0]) - 1]
                                            ax.axvline(x=etime, color='b', linestyle='--', linewidth=.6)
                                        else:
                                            for etime in dend_times:
                                                ax.axvline(x=etime, color='b', linestyle='--', linewidth=.6)
                                        # if not any(e is None for e in [global_min, global_max]):
                                        #     ax.axhline(y=global_min, color='r', linestyle='--', linewidth=.6)
                                        #     ax.axhline(y=global_max, color='r', linestyle='--', linewidth=.6)
                                        # else:
                                        #     maxpoint = x[np.argmax(y_nonan_nofv)], max(y_nonan_nofv)
                                        #     ax.annotate('No Global Ranges', size=8,
                                        #                 xy=maxpoint, xytext=(5, 5), textcoords='offset points')
                                        pf.save_fig(save_dir, sname)

                                        # Plot data with outliers removed
                                        fig, ax = pf.plot_timeseries_all(x_nonan_nofv_nE_nogr, y_nonan_nofv_nE_nogr, sv, sv_units,
                                                                         stdev=5)
                                        ax.set_title((r + '\nDeployments: ' + str(plt_deploy) + '\n' + t0 + ' - ' + t1),
                                                     fontsize=8)

                                        # if plotting a specific time range, plot deployment lines only for those deployments
                                        if type(start_time) == dt.datetime:
                                            # for e in list(np.unique(vinfo['deployments'])):
                                            #     etime = dend_times[int(e) - 1]
                                            #     ax.axvline(x=etime, color='b', linestyle='--', linewidth=.6)
                                            etime = dend_times[int(list(np.unique(vinfo['deployments']))[0]) - 1]
                                            ax.axvline(x=etime, color='b', linestyle='--', linewidth=.6)
                                        else:
                                            for etime in dend_times:
                                                ax.axvline(x=etime, color='b', linestyle='--', linewidth=.6)
                                        # if not any(e is None for e in [global_min, global_max]):
                                        #     ax.axhline(y=global_min, color='r', linestyle='--', linewidth=.6)
                                        #     ax.axhline(y=global_max, color='r', linestyle='--', linewidth=.6)
                                        # else:
                                        #     maxpoint = x[np.argmax(y_nonan_nofv_nE_nogr)], max(y_nonan_nofv_nE_nogr)
                                        #     ax.annotate('No Global Ranges', size=8,
                                        #                 xy=maxpoint, xytext=(5, 5), textcoords='offset points')

                                        sfile = '_'.join((sname, 'rmoutliers'))
                                        pf.save_fig(save_dir, sfile)
Ejemplo n.º 13
0
def compare_plot_datasets(df, r, start_time, end_time, sDir, strm=None):
    names = df.columns
    for d, row in df.iterrows():
        #if '0001' not in d:
        print('\n{}'.format(d))
        for i, n in enumerate(names):
            ii = i + 1
            if ii > 1:
                f1 = row[n]
                if type(f1) == float:
                    continue
                elif type(f1) == list:
                    for x in range(ii - 1):
                        f0 = row[names[x]]
                        if type(f0) == float:
                            continue
                        elif type(f0) == list:
                            compare = '{} {}'.format(names[x], n)

                            if len(f0) == 1:
                                ds0 = xr.open_dataset(f0[0])
                                ds0 = ds0.swap_dims({'obs': 'time'})
                            else:
                                ds0 = xr.open_mfdataset(f0)
                                ds0 = ds0.swap_dims({'obs': 'time'})
                                ds0 = ds0.chunk({'time': 100})
                            splt0 = compare.split(' ')[0].split('-')
                            ds0_sci_vars = cf.return_science_vars(splt0[1])
                            ds0_method = splt0[0]

                            if start_time is not None and end_time is not None:
                                ds0 = ds0.sel(time=slice(start_time, end_time))

                                if len(ds0['time'].values) == 0:
                                    print(
                                        'No {} data to plot for specified time range: ({} to {})'
                                        .format(ds0_method, start_time,
                                                end_time))
                                    continue

                            if len(f1) == 1:
                                ds1 = xr.open_dataset(f1[0])
                                ds1 = ds1.swap_dims({'obs': 'time'})
                            else:
                                ds1 = xr.open_mfdataset(f1)
                                ds1 = ds1.swap_dims({'obs': 'time'})
                                ds1 = ds1.chunk({'time': 100})
                            splt1 = compare.split(' ')[1].split('-')
                            ds1_sci_vars = cf.return_science_vars(splt1[1])
                            ds1_method = splt1[0]

                            if start_time is not None and end_time is not None:
                                ds1 = ds1.sel(time=slice(start_time, end_time))
                                if len(ds1['time'].values) == 0:
                                    print(
                                        'No {} data to plot for specified time range: ({} to {})'
                                        .format(ds1_method, start_time,
                                                end_time))
                                    continue

                            t0 = ds0['time']
                            t1 = ds1['time']

                            # find where the variable long names are the same
                            ds0names = long_names(ds0, ds0_sci_vars)
                            ds0names.rename(columns={'name': 'name_ds0'},
                                            inplace=True)
                            ds1names = long_names(ds1, ds1_sci_vars)
                            ds1names.rename(columns={'name': 'name_ds1'},
                                            inplace=True)
                            mapping = pd.merge(ds0names,
                                               ds1names,
                                               on='long_name',
                                               how='inner')
                            print('----------------------')
                            print('{}: {}'.format(d, compare))
                            print('----------------------')

                            subsite = r.split('-')[0]
                            array = subsite[0:2]
                            if start_time is not None and end_time is not None:
                                stime = start_time.strftime('%Y-%m-%d')
                                etime = end_time.strftime('%Y-%m-%d')
                                ext = '-'.join(
                                    (d, compare)
                                ) + '-' + stime + 'to' + etime  #.join((ds0_method, ds1_method
                                save_dir = os.path.join(
                                    sDir, array, subsite, r,
                                    'method_compare_plots', ext)
                            else:
                                save_dir = os.path.join(
                                    sDir, array, subsite, r,
                                    'method_compare_plots', '-'.join(
                                        (ds0_method, ds1_method)))
                            cf.create_dir(save_dir)

                            for rr in mapping.itertuples():
                                index, name_ds0, long_name, name_ds1 = rr
                                print(long_name)

                                ds0_var = ds0[name_ds0]
                                ds1_var = ds1[name_ds1]

                                # reject NaNs
                                nan0_ind = ~np.isnan(ds0_var.data)
                                ds0_nonan = ds0_var.data[nan0_ind]

                                nan1_ind = ~np.isnan(ds1_var.data)
                                ds1_nonan = ds1_var.data[nan1_ind]

                                # only plot if both arrays have data
                                if len(ds0_nonan) > 0 and len(ds1_nonan) > 0:
                                    # Plot all data
                                    fig, ax = pf.plot_timeseries_compare(
                                        t0,
                                        t1,
                                        ds0_var,
                                        ds1_var,
                                        ds0_method,
                                        ds1_method,
                                        long_name,
                                        stdev=None)

                                    title = ' '.join((d, r, '{} vs {}'.format(
                                        ds0_method, ds1_method)))
                                    ax.set_title(title, fontsize=9)
                                    if strm:
                                        sfile = '_'.join(
                                            (d, r, long_name, strm))
                                    else:
                                        sfile = '_'.join((d, r, long_name))
                                    pf.save_fig(save_dir, sfile)

                                    # Plot data with outliers removed
                                    fig, ax = pf.plot_timeseries_compare(
                                        t0,
                                        t1,
                                        ds0_var,
                                        ds1_var,
                                        ds0_method,
                                        ds1_method,
                                        long_name,
                                        stdev=5)

                                    title = ' '.join((d, r, '{} vs {}'.format(
                                        ds0_method, ds1_method)))
                                    ax.set_title(title, fontsize=9)
                                    if strm:
                                        sfile = '_'.join((d, r, long_name,
                                                          strm, 'rmoutliers'))
                                    else:
                                        sfile = '_'.join(
                                            (d, r, long_name, 'rmoutliers'))
                                    pf.save_fig(save_dir, sfile)
Ejemplo n.º 14
0
def main(nc, directory, out, time_break, breakdown):
    """
    files: url to an .nc/.ncml file or the path to a text file containing .nc/.ncml links. A # at the front will skip links in the text file.
    out: Directory to save plots
    """
    list_files = directory + "/*.nc"
    # list_files = ['https://opendap.oceanobservatories.org/thredds/dodsC/ooi/friedrich-knuth-gmail/20170322T191659-RS03AXPS-PC03A-4A-CTDPFA303-streamed-ctdpf_optode_sample/deployment0003_RS03AXPS-PC03A-4A-CTDPFA303-streamed-ctdpf_optode_sample_20170312T000000.426102-20170322T190000.059973.nc',
    # 'https://opendap.oceanobservatories.org/thredds/dodsC/ooi/friedrich-knuth-gmail/20170322T191659-RS03AXPS-PC03A-4A-CTDPFA303-streamed-ctdpf_optode_sample/deployment0003_RS03AXPS-PC03A-4A-CTDPFA303-streamed-ctdpf_optode_sample_20161222T000000.132709-20170311T235959.426096.nc']
    # print list_files
    stream_vars = pf.load_variable_dict(var='eng')  # load engineering variables

    with xr.open_dataset(nc, mask_and_scale=False) as ds_ncfile:
        stream = ds_ncfile.stream  # List stream name associated with the data
        title_pre = mk_str(ds_ncfile.attrs, 't')  # , var, tt0, tt1, 't')
        save_pre = mk_str(ds_ncfile.attrs, 's')  # , var, tt0, tt1, 's')
        platform = ds_ncfile.subsite
        node = ds_ncfile.node
        sensor = ds_ncfile.sensor
        # save_dir = os.path.join(out, platform, node, stream, 'xsection_depth_profiles')
        save_dir = os.path.join(out,'timeseries',breakdown)
        cf.create_dir(save_dir)


    with xr.open_mfdataset(list_files) as ds:
        # change dimensions from 'obs' to 'time'
        ds = ds.swap_dims({'obs': 'time'})
        ds_variables = ds.data_vars.keys()  # List of dataset variables

        # try:
        #     eng = stream_vars[stream]  # select specific streams engineering variables
        # except KeyError:
        #     eng = ['']

        misc = ['quality', 'string', 'timestamp', 'deployment', 'id', 'provenance', 'qc',  'time', 'mission', 'obs',
        'volt', 'ref', 'sig', 'amp', 'rph', 'calphase', 'phase', 'therm']

        # reg_ex = re.compile('|'.join(eng+misc))  # make regular expression
        reg_ex = re.compile('|'.join(misc))

        #  keep variables that are not in the regular expression
        sci_vars = [s for s in ds_variables if not reg_ex.search(s)]

        # t0, t1 = pf.get_rounded_start_and_end_times(ds_disk['time'].data)
        # tI = (pd.to_datetime(t0) + (pd.to_datetime(t1) - pd.to_datetime(t0)) / 2)
        # time_list = [[t0, t1], [t0, tI], [tI, t1]]

        times = np.unique(ds[time_break])
        
        for t in times:
            time_ind = t == ds[time_break].data
            for var in sci_vars:
                x = dict(data=ds['time'].data[time_ind],
                         info=dict(label='Time', units='GMT'))
                t0 = pd.to_datetime(x['data'].min()).strftime('%Y-%m-%dT%H%M%00')
                t1 = pd.to_datetime(x['data'].max()).strftime('%Y-%m-%dT%H%M%00')
                try:
                    sci = ds[var]
                    print var
                    # sci = sub_ds[var]
                except UnicodeEncodeError: # some comments have latex characters
                    ds[var].attrs.pop('comment')  # remove from the attributes
                    sci = ds[var]  # or else the variable won't load

                try:
                    y_lab = sci.long_name
                except AttributeError:
                    y_lab = sci.standard_name
                y = dict(data=sci.data[time_ind], info=dict(label=y_lab, units=str(sci.units), var=var,
                                                            platform=platform, node=node, sensor=sensor))

                title = title_pre + var

                # plot timeseries with outliers
                fig, ax = pf.auto_plot(x, y, title, stdev=None, line_style='.', g_range=True)
                pf.resize(width=12, height=8.5)  # Resize figure

                save_name = '{}-{}-{}_{}_{}-{}'.format(platform, node, sensor, var, t0, t1)
                pf.save_fig(save_dir, save_name, res=150)  # Save figure
                plt.close('all')
                # try:
                #     y_lab = sci.standard_name
                # except AttributeError:
                #     y_lab = var
                # y = dict(data=sci.data, info=dict(label=y_lab, units=sci.units))

                # plot timeseries with outliers removed
                # fig, ax = pf.auto_plot(x, y, title, stdev=1, line_style='.', g_range=True)
                # pf.resize(width=12, height=8.5)  # Resize figure

                # save_name = '{}-{}-{}_{}_{}-{}_outliers_removed'.format(platform, node, sensor, var, t0, t1)
                # pf.save_fig(save_dir, save_name, res=150)  # Save figure
                # plt.close('all')
            del x, y
def main(sDir, url_list, start_time, end_time, deployment_num, interval):
    rd_list = []
    for uu in url_list:
        elements = uu.split('/')[-2].split('-')
        rd = '-'.join((elements[1], elements[2], elements[3], elements[4]))
        if rd not in rd_list:
            rd_list.append(rd)

    for r in rd_list:
        print('\n{}'.format(r))
        datasets = []
        deployments = []
        for u in url_list:
            splitter = u.split('/')[-2].split('-')
            rd_check = '-'.join((splitter[1], splitter[2], splitter[3], splitter[4]))
            if rd_check == r:
                udatasets = cf.get_nc_urls([u])
                datasets.append(udatasets)
                for ud in udatasets:
                    if ud.split('/')[-1].split('_')[0] not in deployments:
                        deployments.append(ud.split('/')[-1].split('_')[0])
        datasets = list(itertools.chain(*datasets))
        datasets = cf.filter_collocated_instruments(r, datasets)
        deployments.sort()

        fdatasets = np.unique(datasets).tolist()
        for deploy in deployments:
            if deployment_num is not None:
                if int(deploy[-4:]) is not deployment_num:
                    print('\nskipping {}'.format(deploy))
                    continue

            rdatasets = [s for s in fdatasets if deploy in s]

            # break deployment into 4 segments or make a list of the time range specified
            if start_time is not None and end_time is not None:
                dt_range = [dt.datetime.strftime(start_time, '%Y-%m-%d'), dt.datetime.strftime(end_time, '%Y-%m-%d')]
            else:
                # Get deployment info from the data review database
                dr_data = cf.refdes_datareview_json(r)
                d_info = [x for x in dr_data['instrument']['deployments'] if x['deployment_number'] == int(deploy[-4:])]
                d_info = d_info[0]
                deploy_start = dt.datetime.strptime(str(d_info['start_date']).split('T')[0], '%Y-%m-%d')
                deploy_stop = dt.datetime.strptime(str(d_info['stop_date']).split('T')[0], '%Y-%m-%d') + dt.timedelta(
                    days=1)
                dt_range = list(date_range(deploy_start, deploy_stop, 4))

            sci_vars_dict = {'time': dict(values=np.array([], dtype=np.datetime64), fv=[], ln=[]),
                             'bin_depths': dict(values=np.array([]), units=[], fv=[], ln=[])}
            percentgood = {'percent_good_beam1': dict(values=np.array([])),
                           'percent_good_beam2': dict(values=np.array([])),
                           'percent_good_beam3': dict(values=np.array([])),
                           'percent_good_beam4': dict(values=np.array([]))}

            if interval is None:
                toplot = range(len(dt_range) - 1)
            else:
                toplot = [interval - 1]

            for dtri in toplot:
                stime = dt.datetime.strptime(dt_range[dtri], '%Y-%m-%d')
                etime = dt.datetime.strptime(dt_range[dtri + 1], '%Y-%m-%d')
                if len(rdatasets) > 0:
                    for i in range(len(rdatasets)):
                    #for i in range(0, 2):  ##### for testing
                        ds = xr.open_dataset(rdatasets[i], mask_and_scale=False)
                        ds = ds.swap_dims({'obs': 'time'})
                        print('\nAppending data from {}: file {} of {}'.format(deploy, i + 1, len(rdatasets)))

                        ds = ds.sel(time=slice(stime, etime))
                        if len(ds['time'].values) == 0:
                            print('No data to plot for specified time range: ({} to {})'.format(start_time, end_time))
                            continue

                        try:
                            print(fname)
                        except NameError:
                            fname, subsite, refdes, method, stream, deployment = cf.nc_attributes(rdatasets[0])
                            array = subsite[0:2]
                            sci_vars = cf.return_science_vars(stream)
                            # drop the following list of key words from science variables list
                            sci_vars = notin_list(sci_vars, ['salinity', 'temperature', 'bin_depths', 'beam'])
                            sci_vars = [name for name in sci_vars if ds[name].units != 'mm s-1']

                            for sci_var in sci_vars:
                                sci_vars_dict.update({sci_var: dict(values=np.array([]), units=[], fv=[], ln=[])})

                        # append data for the deployment into a dictionary
                        for s_v, info in sci_vars_dict.items():
                            print(s_v)
                            vv = ds[s_v]
                            try:
                                if vv.units not in info['units']:
                                    info['units'].append(vv.units)
                            except AttributeError:
                                print('no units')
                            try:
                                if vv._FillValue not in info['fv']:
                                    info['fv'].append(vv._FillValue)
                            except AttributeError:
                                print('no fill value')

                            try:
                                if vv.long_name not in info['ln']:
                                    info['ln'].append(vv.long_name)
                            except AttributeError:
                                print('no long name')

                            if len(vv.dims) == 1:
                                info['values'] = np.append(info['values'], vv.values)
                            else:
                                if len(info['values']) == 0:
                                    info['values'] = vv.values.T
                                else:
                                    info['values'] = np.concatenate((info['values'], vv.values.T), axis=1)

                        # append percent good beams
                        for j, k in percentgood.items():
                            pgvv = ds[j]
                            fv_pgvv = pgvv._FillValue
                            pgvv = pgvv.values.T.astype(float)
                            pgvv[pgvv == fv_pgvv] = np.nan
                            if len(k['values']) == 0:
                                k['values'] = pgvv
                            else:
                                k['values'] = np.concatenate((k['values'], pgvv), axis=1)

                    if len(sci_vars_dict['time']['values']) > 0:
                        filename = '_'.join(fname.split('_')[:-1])
                        save_dir = os.path.join(sDir, array, subsite, refdes, 'plots', deployment)
                        cf.create_dir(save_dir)

                        tm = sci_vars_dict['time']['values']
                        t0 = pd.to_datetime(tm.min()).strftime('%Y-%m-%dT%H:%M:%S')
                        t1 = pd.to_datetime(tm.max()).strftime('%Y-%m-%dT%H:%M:%S')
                        title_text = ' '.join((deployment, refdes, method))

                        bd = sci_vars_dict['bin_depths']
                        ylabel = 'bin_depths ({})'.format(bd['units'][0])

                        print('\nPlotting interval {}'.format(int(dtri) + 1))
                        for var in sci_vars:
                            print('----{}'.format(var))
                            v = sci_vars_dict[var]
                            fv = v['fv'][0]
                            v_name = v['ln'][0]
                            units = v['units'][0]

                            if len(np.shape(v['values'])) == 1:
                                v, n_nan, n_fv, n_ev, n_grange, g_min, g_max, n_std = reject_err_data_1_dims(v['values'], fv, r, var, n=5)

                                if len(tm) > np.sum(np.isnan(v)):  # only plot if the array contains values
                                    # Plot all data
                                    fig, ax = pf.plot_timeseries(tm, v, v_name, stdev=None)
                                    ax.set_title((title_text + '\n' + t0 + ' - ' + t1), fontsize=9)
                                    sfile = '-'.join((filename, v_name, t0[:10]))
                                    pf.save_fig(save_dir, sfile)

                                    # Plot data with outliers removed
                                    fig, ax = pf.plot_timeseries(tm, v, v_name, stdev=5)
                                    title_i = 'removed: {} nans, {} fill values, {} extreme values, {} GR [{}, {}],' \
                                              ' {} outliers +/- 5 SD'.format(n_nan, n_fv , n_ev, n_grange, g_min, g_max, n_std)

                                    ax.set_title((title_text + '\n' + t0 + ' - ' + t1 + '\n' + title_i), fontsize=8)
                                    sfile = '-'.join((filename, v_name, t0[:10])) + '_rmoutliers'
                                    pf.save_fig(save_dir, sfile)
                                else:
                                    print('Array of all nans - skipping plot')

                            else:
                                v, n_nan, n_fv, n_ev, n_bb, n_grange, g_min, g_max = reject_err_data_2_dims(v['values'], percentgood, fv, r, var)

                                clabel = '{} ({})'.format(var, units)

                                # check bin depths for extreme values
                                y = bd['values']
                                # if all the values are negative, take the absolute value (cabled data bin depths are negative)
                                if int(np.nanmin(y)) < 0 and int(np.nanmax(y)) < 0:
                                    y = abs(y)
                                y_nan = np.sum(np.isnan(y))
                                y = np.where(y < 6000, y, np.nan)  # replace extreme bin_depths by nans
                                bin_nan = np.sum(np.isnan(y)) - y_nan
                                bin_title = 'removed: {} bin depths > 6000'.format(bin_nan)

                                if 'echo' in var:
                                    color = 'BuGn'
                                else:
                                    color = 'RdBu'

                                new_y = dropna(y, axis=1)  # convert to DataFrame to drop nan
                                y_mask = new_y.loc[list(new_y.index), list(new_y.columns)]
                                v_new = pd.DataFrame(v)
                                v_mask = v_new.loc[list(new_y.index), list(new_y.columns)]
                                tm_mask = tm[new_y.columns]

                                fig, ax, __ = pf.plot_adcp(tm_mask, np.array(y_mask), np.array(v_mask), ylabel, clabel, color,
                                                           n_stdev=None)

                                if bin_nan > 0:
                                    ax.set_title((title_text + '\n' + t0 + ' - ' + t1 + '\n' + bin_title), fontsize=8)
                                else:
                                    ax.set_title((title_text + '\n' + t0 + ' - ' + t1), fontsize=8)

                                sfile = '-'.join((filename, var, t0[:10]))
                                pf.save_fig(save_dir, sfile)

                                fig, ax, n_nans_all = pf.plot_adcp(tm_mask, np.array(y_mask), np.array(v_mask), ylabel, clabel, color, n_stdev=5)
                                title_i = 'removed: {} nans, {} fill values, {} extreme values, {} bad beams, {} GR [{}, {}]'.format(
                                    n_nan, n_fv, n_ev, n_bb, n_grange, g_min, g_max)

                                if bin_nan > 0:
                                    ax.set_title((title_text + '\n' + t0 + ' - ' + t1 + '\n' + title_i + '\n' + bin_title), fontsize=8)
                                else:
                                    ax.set_title((title_text + '\n' + t0 + ' - ' + t1 + '\n' + title_i), fontsize=8)

                                sfile = '-'.join((filename, var, t0[:10])) + '_rmoutliers'
                                pf.save_fig(save_dir, sfile)
Ejemplo n.º 16
0
    tel_long_name.append(tv_longname)  # list of telemetered long names

tel_df = pd.DataFrame({'tel_name': tel_name, 'long_name': tel_long_name})

mapping = pd.merge(rec_df, tel_df, on='long_name',how='inner')  # map the recovered and telemetered names based on long name

for row in mapping.itertuples():
    index,long_name,rec_name,tel_name = row
    r_var = rec[rec_name]
    r_data = r_var.data

    t_var = tel[tel_name]
    t_data = t_var.data

    time_rec = rec['time'].data
    time_tel = tel['time'].data

    x1 = dict(data=time_rec, info=dict(platform=platform, node=node, sensor=sensor,  units='GMT', label='Time', var=rec_name))
    y1 = dict(data=r_data, info=dict(platform=platform, node=node, sensor=sensor, label=long_name, units=r_var.units, var=rec_name))

    x2 = dict(data=time_tel, info=dict(platform=platform, node=node, sensor=sensor,  units='GMT', label='Time', var=tel_name))
    y2 = dict(data=t_data, info=dict(platform=platform, node=node, sensor=sensor, label=long_name, units=t_var.units, var=tel_name))

    fig,ax = pf.compare_timeseries(x1, y1, x2, y2, g_range=True)

    title_text = '{}\nVariable: {}\ntelemetered ({}) vs {} ({})'.format(title, long_name, tel_name, rec_method, rec_name)
    plt.title(title_text, fontsize=10)
    pf.resize(width=12, height=8.5)  # Resize figure
    save_name = '{}_{}'.format(title, long_name)
    pf.save_fig(save_dir, save_name, res=150)  # Save figure
    plt.close('all')
Ejemplo n.º 17
0
              info=dict(platform=platform,
                        node=node,
                        sensor=sensor,
                        label=long_name,
                        units=r_var.units,
                        var=rec_name))

    x2 = dict(data=time_tel,
              info=dict(platform=platform,
                        node=node,
                        sensor=sensor,
                        units='GMT',
                        label='Time',
                        var=tel_name))
    y2 = dict(data=t_data,
              info=dict(platform=platform,
                        node=node,
                        sensor=sensor,
                        label=long_name,
                        units=t_var.units,
                        var=tel_name))

    fig, ax = pf.compare_timeseries(x1, y1, x2, y2, g_range=True)

    title_text = '{}\nVariable: {}\ntelemetered ({}) vs {} ({})'.format(
        title, long_name, tel_name, rec_method, rec_name)
    plt.title(title_text, fontsize=10)
    pf.resize(width=12, height=8.5)  # Resize figure
    save_name = '{}_{}'.format(title, long_name)
    pf.save_fig(save_dir, save_name, res=150)  # Save figure
    plt.close('all')
Ejemplo n.º 18
0
def main(sDir, url_list, start_time, end_time, preferred_only):
    rd_list = []
    for uu in url_list:
        elements = uu.split('/')[-2].split('-')
        rd = '-'.join((elements[1], elements[2], elements[3], elements[4]))
        if rd not in rd_list:
            rd_list.append(rd)

    for r in rd_list:
        print('\n{}'.format(r))
        datasets = []
        for u in url_list:
            splitter = u.split('/')[-2].split('-')
            rd_check = '-'.join(
                (splitter[1], splitter[2], splitter[3], splitter[4]))
            if rd_check == r:
                udatasets = cf.get_nc_urls([u])
                datasets.append(udatasets)
        datasets = list(itertools.chain(*datasets))
        fdatasets = []
        if preferred_only == 'yes':
            # get the preferred stream information
            ps_df, n_streams = cf.get_preferred_stream_info(r)
            for index, row in ps_df.iterrows():
                for ii in range(n_streams):
                    try:
                        rms = '-'.join((r, row[ii]))
                    except TypeError:
                        continue
                    for dd in datasets:
                        spl = dd.split('/')[-2].split('-')
                        catalog_rms = '-'.join(
                            (spl[1], spl[2], spl[3], spl[4], spl[5], spl[6]))
                        fdeploy = dd.split('/')[-1].split('_')[0]
                        if rms == catalog_rms and fdeploy == row['deployment']:
                            fdatasets.append(dd)
        else:
            fdatasets = datasets

        fdatasets = np.unique(fdatasets).tolist()
        for fd in fdatasets:
            ds = xr.open_dataset(fd, mask_and_scale=False)
            ds = ds.swap_dims({'obs': 'time'})

            if start_time is not None and end_time is not None:
                ds = ds.sel(time=slice(start_time, end_time))
                if len(ds['time'].values) == 0:
                    print(
                        'No data to plot for specified time range: ({} to {})'.
                        format(start_time, end_time))
                    continue

            fname, subsite, refdes, method, stream, deployment = cf.nc_attributes(
                fd)
            sci_vars = cf.return_science_vars(stream)
            print('\nPlotting {} {}'.format(r, deployment))
            array = subsite[0:2]
            filename = '_'.join(fname.split('_')[:-1])
            save_dir = os.path.join(sDir, array, subsite, refdes,
                                    'timeseries_plots')
            cf.create_dir(save_dir)

            tm = ds['time'].values
            t0 = pd.to_datetime(tm.min()).strftime('%Y-%m-%dT%H:%M:%S')
            t1 = pd.to_datetime(tm.max()).strftime('%Y-%m-%dT%H:%M:%S')
            title = ' '.join((deployment, refdes, method))

            # -------- plot entire deployment --------

            for var in sci_vars:
                print(var)
                vv = ds[var]
                fv = vv._FillValue
                # need to round SPKIR values to 1 decimal place to match the global ranges. otherwise, values that
                # round to zero (e.g. 1.55294e-05) will be excluded by the global range test
                # v = np.round(vv.values.T, 1)  # .T = transpose 2D array
                v = vv.values.T
                n_nan = np.sum(np.isnan(v))

                # convert fill values to nans
                v[v == fv] = np.nan
                n_fv = np.sum(np.isnan(v)) - n_nan

                # plot before global ranges are removed
                fig, ax = pf.plot_spkir(tm, v, vv.name, vv.units)
                ax.set_title((title + '\n' + t0 + ' - ' + t1), fontsize=9)
                sfile = '-'.join((filename, var, t0[:10]))
                pf.save_fig(save_dir, sfile)

                # reject data outside of global ranges
                [g_min, g_max] = cf.get_global_ranges(r, var)
                if g_min is not None and g_max is not None:
                    v[v < g_min] = np.nan
                    v[v > g_max] = np.nan
                    n_grange = np.sum(np.isnan(v)) - n_fv - n_nan
                else:
                    n_grange = 'no global ranges'

                # plot after global ranges are removed
                fig, ax = pf.plot_spkir(tm, v, vv.name, vv.units)
                title2 = 'removed: {} global ranges [{}, {}]'.format(
                    n_grange, g_min, g_max)
                ax.set_title((title + '\n' + t0 + ' - ' + t1 + '\n' + title2),
                             fontsize=9)
                sfile = '-'.join((filename, var, t0[:10], 'rmgr'))
                pf.save_fig(save_dir, sfile)

            # -------- break the deployment into months and plot --------

            save_dir = os.path.join(sDir, array, subsite, refdes,
                                    'timeseries_plots', 'monthly')
            cf.create_dir(save_dir)

            # create list of start and end dates
            dt_start = dt.datetime.strptime(t0, '%Y-%m-%dT%H:%M:%S')
            dt_end = dt.datetime.strptime(t1, '%Y-%m-%dT%H:%M:%S')
            start_dates = [dt_start.strftime('%m-%d-%YT00:00:00')]
            end_dates = []
            ts1 = dt_start
            while ts1 <= dt_end:
                ts2 = ts1 + dt.timedelta(days=1)
                if ts2.month != ts1.month:
                    start_dates.append(ts2.strftime('%m-%d-%YT00:00:00'))
                    end_dates.append(ts1.strftime('%m-%d-%YT23:59:59'))
                ts1 = ts2
            end_dates.append(dt_end.strftime('%m-%d-%YT23:59:59'))

            for sd, ed in zip(start_dates, end_dates):
                sd_format = dt.datetime.strptime(sd, '%m-%d-%YT%H:%M:%S')
                ed_format = dt.datetime.strptime(ed, '%m-%d-%YT%H:%M:%S')
                ds_month = ds.sel(time=slice(sd_format, ed_format))
                if len(ds_month['time'].values) == 0:
                    print(
                        'No data to plot for specified time range: ({} to {})'.
                        format(sd, ed))
                    continue
                tm = ds_month['time'].values
                t0 = pd.to_datetime(tm.min()).strftime('%Y-%m-%dT%H:%M:%S')
                t1 = pd.to_datetime(tm.max()).strftime('%Y-%m-%dT%H:%M:%S')

                for var in sci_vars:
                    print(var)
                    vv = ds_month[var]
                    fv = vv._FillValue
                    v = vv.values.T  # transpose 2D array
                    n_nan = np.sum(np.isnan(v))

                    # convert fill values to nans
                    v[v == fv] = np.nan
                    n_fv = np.sum(np.isnan(v)) - n_nan

                    # reject data outside of global ranges
                    [g_min, g_max] = cf.get_global_ranges(r, var)
                    if g_min is not None and g_max is not None:
                        v[v < g_min] = np.nan
                        v[v > g_max] = np.nan
                        n_grange = np.sum(np.isnan(v)) - n_fv - n_nan
                    else:
                        n_grange = 'no global ranges'

                    # plot after global ranges are removed
                    fig, ax = pf.plot_spkir(tm, v, vv.name, vv.units)
                    title2 = 'removed: {} global ranges [{}, {}]'.format(
                        n_grange, g_min, g_max)
                    ax.set_title(
                        (title + '\n' + t0 + ' - ' + t1 + '\n' + title2),
                        fontsize=9)
                    sfile = '-'.join((filename, var, t0[:7], 'rmgr'))
                    pf.save_fig(save_dir, sfile)
Ejemplo n.º 19
0
def main(sDir, url_list, start_time, end_time, preferred_only):
    rd_list = []
    for uu in url_list:
        elements = uu.split('/')[-2].split('-')
        rd = '-'.join((elements[1], elements[2], elements[3], elements[4]))
        if rd not in rd_list:
            rd_list.append(rd)

    for r in rd_list:
        print('\n{}'.format(r))
        datasets = []
        for u in url_list:
            splitter = u.split('/')[-2].split('-')
            rd_check = '-'.join((splitter[1], splitter[2], splitter[3], splitter[4]))
            if rd_check == r:
                udatasets = cf.get_nc_urls([u])
                datasets.append(udatasets)
        datasets = list(itertools.chain(*datasets))
        fdatasets = []
        if preferred_only == 'yes':
            # get the preferred stream information
            ps_df, n_streams = cf.get_preferred_stream_info(r)
            for index, row in ps_df.iterrows():
                for ii in range(n_streams):
                    try:
                        rms = '-'.join((r, row[ii]))
                    except TypeError:
                        continue
                    for dd in datasets:
                        spl = dd.split('/')[-2].split('-')
                        catalog_rms = '-'.join((spl[1], spl[2], spl[3], spl[4], spl[5], spl[6]))
                        fdeploy = dd.split('/')[-1].split('_')[0]
                        if rms == catalog_rms and fdeploy == row['deployment']:
                            fdatasets.append(dd)
        else:
            fdatasets = datasets

        fdatasets = np.unique(fdatasets).tolist()
        main_sensor = r.split('-')[-1]
        fdatasets = cf.filter_collocated_instruments(main_sensor, fdatasets)

        for fd in fdatasets:
            if '_blank' not in fd:
                ds = xr.open_dataset(fd, mask_and_scale=False)
                ds = ds.swap_dims({'obs': 'time'})
                ds_vars = list(ds.data_vars.keys()) + [x for x in ds.coords.keys() if 'pressure' in x]  # get pressure variable from coordinates
                #raw_vars = cf.return_raw_vars(ds_vars)

                if start_time is not None and end_time is not None:
                    ds = ds.sel(time=slice(start_time, end_time))
                    if len(ds['time'].values) == 0:
                        print('No data to plot for specified time range: ({} to {})'.format(start_time, end_time))
                        continue

                fname, subsite, refdes, method, stream, deployment = cf.nc_attributes(fd)
                if 'NUTNR' in refdes:
                    vars = cf.return_science_vars(stream)
                else:
                    vars = cf.return_raw_vars(ds_vars)
                print('\nPlotting {} {}'.format(r, deployment))
                array = subsite[0:2]
                filename = '_'.join(fname.split('_')[:-1])
                save_dir = os.path.join(sDir, array, subsite, refdes, 'timeseries_plots', deployment)
                cf.create_dir(save_dir)

                tm = ds['time'].values
                t0 = pd.to_datetime(tm.min()).strftime('%Y-%m-%dT%H:%M:%S')
                t1 = pd.to_datetime(tm.max()).strftime('%Y-%m-%dT%H:%M:%S')
                title = ' '.join((deployment, refdes, method))

                for var in vars:
                    print(var)
                    if var != 'id':
                        y = ds[var]
                        try:
                            fv = y._FillValue
                        except AttributeError:
                            fv = np.nan
                        if len(y.dims) == 1:
                            # Check if the array is all NaNs
                            if sum(np.isnan(y.values)) == len(y.values):
                                print('Array of all NaNs - skipping plot.')

                            # Check if the array is all fill values
                            elif len(y[y != fv]) == 0:
                                print('Array of all fill values - skipping plot.')

                            else:
                                # reject fill values
                                ind = y.values != fv
                                t = tm[ind]
                                y = y[ind]

                                # Plot all data
                                fig, ax = pf.plot_timeseries(t, y, y.name, stdev=None)
                                ax.set_title((title + '\n' + t0 + ' - ' + t1), fontsize=9)
                                sfile = '-'.join((filename, y.name, t0[:10]))
                                pf.save_fig(save_dir, sfile)

                                # Plot data with outliers removed
                                fig, ax = pf.plot_timeseries(t, y, y.name, stdev=5)
                                ax.set_title((title + '\n' + t0 + ' - ' + t1), fontsize=9)
                                sfile = '-'.join((filename, y.name, t0[:10])) + '_rmoutliers'
                                pf.save_fig(save_dir, sfile)
def main(files, out, time_break, depth, start, end, interactive):
    """
    files: url to an .nc/.ncml file or the path to a text file containing .nc/.ncml links. A # at the front will skip links in the text file.
    out: Directory to save plots
    """
    fname, ext = os.path.splitext(files)
    if ext in '.nc':
        list_files = [files]
    elif ext in '.ncml':
        list_files = [files]
    else:
        list_files = read_file(files)

    stream_vars = pf.load_variable_dict(var='eng')  # load engineering variables
    for nc in list_files:
        print nc
        with xr.open_dataset(nc, mask_and_scale=False) as ds:
            # change dimensions from 'obs' to 'time'
            ds = ds.swap_dims({'obs': 'time'})
            ds_variables = ds.data_vars.keys()  # List of dataset variables
            stream = ds.stream  # List stream name associated with the data
            title_pre = mk_str(ds.attrs, 't')  # , var, tt0, tt1, 't')
            save_pre = mk_str(ds.attrs, 's')  # , var, tt0, tt1, 's')
            platform = ds.subsite
            node = ds.node
            sensor = ds.sensor
            # save_dir = os.path.join(out,'xsection_depth_profiles')
            save_dir = os.path.join(out, ds.subsite, ds.subsite + '-' + ds.node + '-' + ds.sensor, ds.stream, 'xsection_depth_profiles')
            cf.create_dir(save_dir)

            misc = ['quality', 'string', 'timestamp', 'deployment', 'id', 'provenance', 'qc',  'time', 'mission', 'obs',
            'volt', 'ref', 'sig', 'amp', 'rph', 'calphase', 'phase', 'therm', 'light']

            reg_ex = re.compile('|'.join(misc))

            #  keep variables that are not in the regular expression
            sci_vars = [s for s in ds_variables if not reg_ex.search(s)]

            if not time_break == None:
                times = np.unique(ds[time_break])
            
                for t in times:
                    time_ind = t == ds[time_break].data
                    for var in sci_vars:
                        x = dict(data=ds['time'].data[time_ind],
                                 info=dict(label='Time', units='GMT'))
                        t0 = pd.to_datetime(x['data'].min()).strftime('%Y-%m-%dT%H%M%00')
                        t1 = pd.to_datetime(x['data'].max()).strftime('%Y-%m-%dT%H%M%00')
                        try:
                            sci = ds[var]
                            print var
                            # sci = sub_ds[var]
                        except UnicodeEncodeError: # some comments have latex characters
                            ds[var].attrs.pop('comment')  # remove from the attributes
                            sci = ds[var]  # or else the variable won't load


                        y = dict(data=ds[depth].data[time_ind], info=dict(label='Pressure', units='dbar', var=var,
                                                                    platform=platform, node=node, sensor=sensor))

                        
                        try:
                            z_lab = sci.long_name
                        except AttributeError:
                            z_lab = sci.standard_name
                        z = dict(data=sci.data[time_ind], info=dict(label=z_lab, units=str(sci.units), var=var,
                                                                    platform=platform, node=node, sensor=sensor))

                        title = title_pre + var

                        # plot timeseries with outliers
                        fig, ax = pf.depth_glider_cross_section(x, y, z, title=title)

                        if interactive == True:
                            fig.canvas.mpl_connect('pick_event', lambda event: pf.onpick3(event, x['data'], y['data'], z['data']))
                            plt.show()

                        else:
                            pf.resize(width=12, height=8.5)  # Resize figure
                            save_name = '{}-{}-{}_{}_{}-{}'.format(platform, node, sensor, var, t0, t1)
                            pf.save_fig(save_dir, save_name, res=150)  # Save figure
                            plt.close('all')


            else:
                ds = ds.sel(time=slice(start, end))

                for var in sci_vars:
                    x = dict(data=ds['time'].data[:],
                             info=dict(label='Time', units='GMT'))
                    t0 = pd.to_datetime(x['data'].min()).strftime('%Y-%m-%dT%H%M%00')
                    t1 = pd.to_datetime(x['data'].max()).strftime('%Y-%m-%dT%H%M%00')
                    try:
                        sci = ds[var]
                        print var
                        # sci = sub_ds[var]
                    except UnicodeEncodeError: # some comments have latex characters
                        ds[var].attrs.pop('comment')  # remove from the attributes
                        sci = ds[var]  # or else the variable won't load


                    y = dict(data=ds[depth].data[:], info=dict(label='Pressure', units='dbar', var=var,
                                                                platform=platform, node=node, sensor=sensor))


                    try:
                        z_lab = sci.long_name
                    except AttributeError:
                        z_lab = sci.standard_name
                    z = dict(data=sci.data[:], info=dict(label=z_lab, units=sci.units, var=var,
                                                                platform=platform, node=node, sensor=sensor))

                    title = title_pre + var

                    # plot timeseries with outliers
                    fig, ax = pf.depth_glider_cross_section(x, y, z, title=title, interactive=interactive)

                    if interactive == True:
                        fig.canvas.mpl_connect('pick_event', lambda event: pf.onpick3(event, x['data'], y['data'], z['data']))
                        plt.show()

                    else:
                        pf.resize(width=12, height=8.5)  # Resize figure
                        save_name = '{}-{}-{}_{}_{}-{}'.format(platform, node, sensor, var, t0, t1)
                        pf.save_fig(save_dir, save_name, res=150)  # Save figure
                        plt.close('all')
Ejemplo n.º 21
0
def main(url_list, sDir, mDir, zcell_size, zdbar, start_time, end_time, inpercentile):

    """""
    URL : path to instrument data by methods
    sDir : path to the directory on your machine to save plots
    mDir : path to the directory on your machine to save data ranges
    zcell_size : depth cell size to group data
    zdbar : define depth where suspect data are identified
    start_time : select start date to slice timeseries
    end_time : select end date to slice timeseries
    """""
    rd_list = []
    ms_list = []
    for uu in url_list:
        elements = uu.split('/')[-2].split('-')
        rd = '-'.join((elements[1], elements[2], elements[3], elements[4]))
        ms = uu.split(rd + '-')[1].split('/')[0]
        if rd not in rd_list:
            rd_list.append(rd)
        if ms not in ms_list:
            ms_list.append(ms)

    for r in rd_list:
        print('\n{}'.format(r))
        subsite = r.split('-')[0]
        array = subsite[0:2]
        main_sensor = r.split('-')[-1]

        # read in the analysis file
        dr_data = cf.refdes_datareview_json(r)

        # get the preferred stream information
        ps_df, n_streams = cf.get_preferred_stream_info(r)

        # get science variable long names from the Data Review Database
        stream_sci_vars = cd.sci_var_long_names(r)

        # check if the science variable long names are the same for each stream and initialize empty arrays
        sci_vars_dict0 = cd.sci_var_long_names_check(stream_sci_vars)

        # get the list of data files and filter out collocated instruments and other streams
        datasets = []
        for u in url_list:
            print(u)
            splitter = u.split('/')[-2].split('-')
            rd_check = '-'.join((splitter[1], splitter[2], splitter[3], splitter[4]))
            if rd_check == r:
                udatasets = cf.get_nc_urls([u])
                datasets.append(udatasets)

        datasets = list(itertools.chain(*datasets))
        fdatasets = cf.filter_collocated_instruments(main_sensor, datasets)
        fdatasets = cf.filter_other_streams(r, ms_list, fdatasets)

        # select the list of data files from the preferred dataset for each deployment
        fdatasets_final = []
        for ii in range(len(ps_df)):
            for x in fdatasets:
                if ps_df['deployment'][ii] in x and ps_df[0][ii] in x:
                    fdatasets_final.append(x)

        # build dictionary of science data from the preferred dataset for each deployment
        print('\nAppending data from files')
        sci_vars_dict, y_unit, y_name, l0 = cd.append_evaluated_science_data(
                                sDir, ps_df, n_streams, r, fdatasets_final, sci_vars_dict0, zdbar, start_time, end_time)

        # get end times of deployments
        deployments = []
        end_times = []
        for index, row in ps_df.iterrows():
            deploy = row['deployment']
            deploy_info = cf.get_deployment_information(dr_data, int(deploy[-4:]))
            deployments.append(int(deploy[-4:]))
            end_times.append(pd.to_datetime(deploy_info['stop_date']))

        # create data range output folders
        save_dir_stat = os.path.join(mDir, array, subsite)
        cf.create_dir(save_dir_stat)
        # create plots output folder
        save_fdir = os.path.join(sDir, array, subsite, r, 'data_range')
        cf.create_dir(save_fdir)
        stat_df = pd.DataFrame()

        """
        create data ranges csv file and figures
        """
        for m, n in sci_vars_dict.items():
            for sv, vinfo in n['vars'].items():
                print('\n' + vinfo['var_name'])
                if len(vinfo['t']) < 1:
                    print('no variable data to plot')
                    continue
                else:
                    sv_units = vinfo['units'][0]
                    fv = vinfo['fv'][0]
                    t = vinfo['t']
                    z = vinfo['values']
                    y = vinfo['pressure']

                # Check if the array is all NaNs
                if sum(np.isnan(z)) == len(z):
                    print('Array of all NaNs - skipping plot.')
                    continue
                # Check if the array is all fill values
                elif len(z[z != fv]) == 0:
                    print('Array of all fill values - skipping plot.')
                    continue
                else:

                    if len(y) > 0:
                        if m == 'common_stream_placeholder':
                            sname = '-'.join((vinfo['var_name'], r))
                        else:
                            sname = '-'.join((vinfo['var_name'], r, m))

                        """
                        create data ranges for non - pressure data only
                        """

                        if 'pressure' in vinfo['var_name']:
                            pass
                        else:
                            columns = ['tsec', 'dbar', str(vinfo['var_name'])]
                            # create depth ranges
                            min_r = int(round(min(y) - zcell_size))
                            max_r = int(round(max(y) + zcell_size))
                            ranges = list(range(min_r, max_r, zcell_size))

                            # group data by depth
                            groups, d_groups = gt.group_by_depth_range(t, y, z, columns, ranges)

                            print('writing data ranges for {}'.format(vinfo['var_name']))
                            stat_data = groups.describe()[vinfo['var_name']]
                            stat_data.insert(loc=0, column='parameter', value=sv, allow_duplicates=False)
                            t_deploy = deployments[0]
                            for i in range(len(deployments))[1:len(deployments)]:
                                t_deploy = '{}, {}'.format(t_deploy, deployments[i])
                            stat_data.insert(loc=1, column='deployments', value=t_deploy, allow_duplicates=False)

                            stat_df = stat_df.append(stat_data, ignore_index=False)

                        """
                        plot full time range free from errors and suspect data
                        """

                        clabel = sv + " (" + sv_units + ")"
                        ylabel = (y_name[0][0] + " (" + y_unit[0][0] + ")")

                        t_eng = None
                        m_water_depth = None

                        # plot non-erroneous -suspect data
                        fig, ax, bar = pf.plot_xsection(subsite, t, y, z, clabel, ylabel, t_eng, m_water_depth,
                                                        inpercentile, stdev=None)

                        title0 = 'Data colored using the upper and lower {} percentile.'.format(inpercentile)
                        ax.set_title(r+'\n'+title0, fontsize=9)
                        leg_text = ('{} % erroneous values removed after Human In the Loop review'.format(
                                                                                                    (len(t)/l0) * 100),)
                        ax.legend(leg_text, loc='upper center', bbox_to_anchor=(0.5, -0.17), fontsize=6)


                        for ii in range(len(end_times)):
                            ax.axvline(x=end_times[ii], color='b', linestyle='--', linewidth=.8)
                            ax.text(end_times[ii], min(y)-5, 'End' + str(deployments[ii]),
                                                   fontsize=6, style='italic',
                                                   bbox=dict(boxstyle='round',
                                                             ec=(0., 0.5, 0.5),
                                                             fc=(1., 1., 1.),
                                                             ))

                        # fig.tight_layout()
                        sfile = '_'.join(('data_range', sname))
                        pf.save_fig(save_fdir, sfile)

            # write stat file
            stat_df.to_csv('{}/{}_data_ranges.csv'.format(save_dir_stat, r), index=True, float_format='%11.6f')
                                            title2 = 'Cruise CTD file: {} Date: {}'.format(
                                                CTDfile.split('/')[-1],
                                                dt.datetime.strftime(
                                                    cast_start,
                                                    '%Y-%m-%dT%H:%M:%S'))
                                            title3 = 'Platform: from {} to {}'.format(
                                                str(ds['time'].values[0])[:19],
                                                str(ds['time'].values[-1])
                                                [:19])
                                            fig.suptitle(
                                                (title1 + '\n' + title2 +
                                                 '\n' + title3),
                                                fontsize=8.5)
                                            sfile = '{}_{}_shipCTDcompare_{}'.format(
                                                refdes, deployment, pvarname)
                                            pf.save_fig(save_dir, sfile)
                                            plt.close()
                                        else:
                                            print(
                                                'No platform data available for Shipboard CTD time frame'
                                            )

                            if 'FLOR' in ds.sensor:
                                if 'MOAS' in ds.subsite:
                                    if 'FLORTM' in ds.sensor:
                                        chlname = 'sci_flbbcd_chlor_units'
                                    else:
                                        chlname = 'sci_flbb_chlor_units'
                                else:
                                    chlname = 'fluorometric_chlorophyll_a'
                                pchla = ds[chlname]
Ejemplo n.º 23
0
def main(sDir, url_list, start_time, end_time, preferred_only):
    rd_list = []
    for uu in url_list:
        elements = uu.split('/')[-2].split('-')
        rd = '-'.join((elements[1], elements[2], elements[3], elements[4]))
        if rd not in rd_list:
            rd_list.append(rd)

    for r in rd_list:
        print('\n{}'.format(r))
        datasets = []
        for u in url_list:
            splitter = u.split('/')[-2].split('-')
            rd_check = '-'.join((splitter[1], splitter[2], splitter[3], splitter[4]))
            if rd_check == r:
                udatasets = cf.get_nc_urls([u])
                datasets.append(udatasets)
        datasets = list(itertools.chain(*datasets))
        fdatasets = []
        if preferred_only == 'yes':
            # get the preferred stream information
            ps_df, n_streams = cf.get_preferred_stream_info(r)
            for index, row in ps_df.iterrows():
                for ii in range(n_streams):
                    rms = '-'.join((r, row[ii]))
                    for dd in datasets:
                        spl = dd.split('/')[-2].split('-')
                        catalog_rms = '-'.join((spl[1], spl[2], spl[3], spl[4], spl[5], spl[6]))
                        fdeploy = dd.split('/')[-1].split('_')[0]
                        if rms == catalog_rms and fdeploy == row['deployment']:
                            fdatasets.append(dd)
        else:
            fdatasets = datasets

        for fd in fdatasets:
            with xr.open_dataset(fd, mask_and_scale=False) as ds:
                ds = ds.swap_dims({'obs': 'time'})

                if start_time is not None and end_time is not None:
                    ds = ds.sel(time=slice(start_time, end_time))
                    if len(ds['time'].values) == 0:
                        print('No data to plot for specified time range: ({} to {})'.format(start_time, end_time))
                        continue

                fname, subsite, refdes, method, stream, deployment = cf.nc_attributes(fd)
                print('\nPlotting {} {}'.format(r, deployment))
                array = subsite[0:2]
                save_dir = os.path.join(sDir, array, subsite, refdes, 'ts_plots')
                cf.create_dir(save_dir)

                tme = ds['time'].values
                t0 = pd.to_datetime(tme.min()).strftime('%Y-%m-%dT%H:%M:%S')
                t1 = pd.to_datetime(tme.max()).strftime('%Y-%m-%dT%H:%M:%S')
                title = ' '.join((deployment, refdes, method))
                filename = '-'.join(('_'.join(fname.split('_')[:-1]), 'ts', t0[:10]))

                ds_vars = list(ds.data_vars.keys())
                raw_vars = cf.return_raw_vars(ds_vars)

                xvar = return_var(ds, raw_vars, 'salinity', 'Practical Salinity')
                sal = ds[xvar].values
                sal_fv = ds[xvar]._FillValue

                yvar = return_var(ds, raw_vars, 'temp', 'Seawater Temperature')
                temp = ds[yvar].values
                temp_fv = ds[yvar]._FillValue

                press = pf.pressure_var(ds, list(ds.coords.keys()))
                if press is None:
                    press = pf.pressure_var(ds, list(ds.data_vars.keys()))
                p = ds[press].values

                # get rid of nans, 0.0s, fill values
                sind1 = (~np.isnan(sal)) & (sal != 0.0) & (sal != sal_fv)
                sal = sal[sind1]
                temp = temp[sind1]
                tme = tme[sind1]
                p = p[sind1]
                tind1 = (~np.isnan(temp)) & (temp != 0.0) & (temp != temp_fv)
                sal = sal[tind1]
                temp = temp[tind1]
                tme = tme[tind1]
                p = p[tind1]

                # reject values outside global ranges:
                global_min, global_max = cf.get_global_ranges(r, xvar)
                if any(e is None for e in [global_min, global_max]):
                    sal = sal
                    temp = temp
                    tme = tme
                    p = p
                else:
                    sgr_ind = cf.reject_global_ranges(sal, global_min, global_max)
                    sal = sal[sgr_ind]
                    temp = temp[sgr_ind]
                    tme = tme[sgr_ind]
                    p = p[sgr_ind]

                global_min, global_max = cf.get_global_ranges(r, yvar)
                if any(e is None for e in [global_min, global_max]):
                    sal = sal
                    temp = temp
                    tme = tme
                    p = p
                else:
                    tgr_ind = cf.reject_global_ranges(temp, global_min, global_max)
                    sal = sal[tgr_ind]
                    temp = temp[tgr_ind]
                    tme = tme[tgr_ind]
                    p = p[tgr_ind]

                # get rid of outliers
                soind = cf.reject_outliers(sal, 5)
                sal = sal[soind]
                temp = temp[soind]
                tme = tme[soind]
                p = p[soind]

                toind = cf.reject_outliers(temp, 5)
                sal = sal[toind]
                temp = temp[toind]
                tme = tme[toind]
                p = p[toind]

                if len(sal) > 0:  # if there are any data to plot

                    colors = cm.rainbow(np.linspace(0, 1, len(tme)))

                    # Figure out boundaries (mins and maxes)
                    #smin = sal.min() - (0.01 * sal.min())
                    #smax = sal.max() + (0.01 * sal.max())
                    if sal.max() - sal.min() < 0.2:
                        smin = sal.min() - (0.0005 * sal.min())
                        smax = sal.max() + (0.0005 * sal.max())
                    else:
                        smin = sal.min() - (0.001 * sal.min())
                        smax = sal.max() + (0.001 * sal.max())

                    if temp.max() - temp.min() <= 1:
                        tmin = temp.min() - (0.01 * temp.min())
                        tmax = temp.max() + (0.01 * temp.max())
                    elif 1 < temp.max() - temp.min() < 1.5:
                        tmin = temp.min() - (0.05 * temp.min())
                        tmax = temp.max() + (0.05 * temp.max())
                    else:
                        tmin = temp.min() - (0.1 * temp.min())
                        tmax = temp.max() + (0.1 * temp.max())

                    # Calculate how many gridcells are needed in the x and y directions and
                    # Create temp and sal vectors of appropriate dimensions
                    xdim = int(round((smax-smin)/0.1 + 1, 0))
                    if xdim == 1:
                        xdim = 2
                    si = np.linspace(0, xdim - 1, xdim) * 0.1 + smin

                    if 1.1 <= temp.max() - temp.min() < 1.7:  # if the diff between min and max temp is small
                        ydim = int(round((tmax-tmin)/0.75 + 1, 0))
                        ti = np.linspace(0, ydim - 1, ydim) * 0.75 + tmin
                    elif temp.max() - temp.min() < 1.1:
                        ydim = int(round((tmax - tmin) / 0.1 + 1, 0))
                        ti = np.linspace(0, ydim - 1, ydim) * 0.1 + tmin
                    else:
                        ydim = int(round((tmax - tmin) + 1, 0))
                        ti = np.linspace(0, ydim - 1, ydim) + tmin

                    # Create empty grid of zeros
                    mdens = np.zeros((ydim, xdim))

                    # Loop to fill in grid with densities
                    for j in range(0, ydim):
                        for i in range(0, xdim):
                            mdens[j, i] = gsw.density.rho(si[i], ti[j], np.median(p))  # calculate density using median pressure value

                    fig, ax = pf.plot_ts(si, ti, mdens, sal, temp, colors)

                    ax.set_title((title + '\n' + t0 + ' - ' + t1 + '\ncolors = time (cooler: earlier)'), fontsize=9)
                    leg_text = ('Removed {} values (SD=5)'.format(len(ds[xvar].values) - len(sal)),)
                    ax.legend(leg_text, loc='best', fontsize=6)
                    pf.save_fig(save_dir, filename)
def main(sDir, ncdir):
    rd_list = [ncdir.split('/')[-2]]

    for r in rd_list:
        print('\n{}'.format(r))
        subsite = r.split('-')[0]
        array = subsite[0:2]

        ps_df, n_streams = cf.get_preferred_stream_info(r)

        # get end times of deployments
        dr_data = cf.refdes_datareview_json(r)
        deployments = []
        end_times = []
        for index, row in ps_df.iterrows():
            deploy = row['deployment']
            deploy_info = get_deployment_information(dr_data, int(deploy[-4:]))
            deployments.append(int(deploy[-4:]))
            end_times.append(pd.to_datetime(deploy_info['stop_date']))

        # filter datasets
        fdatasets = []
        for root, dirs, files in os.walk(ncdir):
            for f in files:
                if f.endswith('.nc'):
                    fdatasets.append(f)
        # for u in url_list:
        #     splitter = u.split('/')[-2].split('-')
        #     rd_check = '-'.join((splitter[1], splitter[2], splitter[3], splitter[4]))
        #     if rd_check == r:
        #         udatasets = cf.get_nc_urls([u])
        #         datasets.append(udatasets)
        # datasets = list(itertools.chain(*datasets))
        # main_sensor = r.split('-')[-1]
        # fdatasets = cf.filter_collocated_instruments(main_sensor, datasets)
        methodstream = []
        for f in fdatasets:
            strm = '_'.join((f.split('-')[-2].split('_')[0], f.split('-')[-2].split('_')[1]))
            methodstream.append('-'.join((f.split('-')[-3], strm)))

        for ms in np.unique(methodstream):
            fdatasets_sel = [x for x in fdatasets if ms in x]
            save_dir = os.path.join(sDir, array, subsite, r, 'timeseries_plots_all')
            cf.create_dir(save_dir)

            stream_sci_vars_dict = dict()
            for x in dr_data['instrument']['data_streams']:
                dr_ms = '-'.join((x['method'], x['stream_name']))
                if ms == dr_ms:
                    stream_sci_vars_dict[dr_ms] = dict(vars=dict())
                    sci_vars = dict()
                    for y in x['stream']['parameters']:
                        if y['data_product_type'] == 'Science Data':
                            sci_vars.update({y['name']: dict(db_units=y['unit'])})
                    if len(sci_vars) > 0:
                        stream_sci_vars_dict[dr_ms]['vars'] = sci_vars

            sci_vars_dict = cd.initialize_empty_arrays(stream_sci_vars_dict, ms)
            print('\nAppending data from files: {}'.format(ms))
            for fd in fdatasets_sel:
                ds = xr.open_dataset(os.path.join(ncdir, fd), mask_and_scale=False)
                for var in list(sci_vars_dict[ms]['vars'].keys()):
                    sh = sci_vars_dict[ms]['vars'][var]
                    if ds[var].units == sh['db_units']:
                        if ds[var]._FillValue not in sh['fv']:
                            sh['fv'].append(ds[var]._FillValue)
                        if ds[var].units not in sh['units']:
                            sh['units'].append(ds[var].units)
                        tD = ds['time'].values
                        varD = ds[var].values
                        sh['t'] = np.append(sh['t'], tD)
                        sh['values'] = np.append(sh['values'], varD)

            print('\nPlotting data')
            for m, n in sci_vars_dict.items():
                for sv, vinfo in n['vars'].items():
                    print(sv)
                    if len(vinfo['t']) < 1:
                        print('no variable data to plot')
                    else:
                        sv_units = vinfo['units'][0]
                        t0 = pd.to_datetime(min(vinfo['t'])).strftime('%Y-%m-%dT%H:%M:%S')
                        t1 = pd.to_datetime(max(vinfo['t'])).strftime('%Y-%m-%dT%H:%M:%S')
                        x = vinfo['t']
                        y = vinfo['values']

                        # reject NaNs
                        nan_ind = ~np.isnan(y)
                        x_nonan = x[nan_ind]
                        y_nonan = y[nan_ind]

                        # reject fill values
                        fv_ind = y_nonan != vinfo['fv'][0]
                        x_nonan_nofv = x_nonan[fv_ind]
                        y_nonan_nofv = y_nonan[fv_ind]

                        # reject extreme values
                        Ev_ind = cf.reject_extreme_values(y_nonan_nofv)
                        y_nonan_nofv_nE = y_nonan_nofv[Ev_ind]
                        x_nonan_nofv_nE = x_nonan_nofv[Ev_ind]

                        # reject values outside global ranges:
                        global_min, global_max = cf.get_global_ranges(r, sv)
                        if global_min is not None and global_max is not None:
                            gr_ind = cf.reject_global_ranges(y_nonan_nofv_nE, global_min, global_max)
                            y_nonan_nofv_nE_nogr = y_nonan_nofv_nE[gr_ind]
                            x_nonan_nofv_nE_nogr = x_nonan_nofv_nE[gr_ind]
                        else:
                            y_nonan_nofv_nE_nogr = y_nonan_nofv_nE
                            x_nonan_nofv_nE_nogr = x_nonan_nofv_nE

                        title = ' '.join((r, ms.split('-')[0]))

                        if len(y_nonan_nofv) > 0:
                            if m == 'common_stream_placeholder':
                                sname = '-'.join((r, sv))
                            else:
                                sname = '-'.join((r, m, sv))

                            # Plot all data
                            fig, ax = pf.plot_timeseries_all(x_nonan_nofv, y_nonan_nofv, sv, sv_units, stdev=None)
                            ax.set_title((title + '\nDeployments: ' + str(sorted(deployments)) + '\n' + t0 + ' - ' + t1),
                                         fontsize=8)
                            for etimes in end_times:
                                ax.axvline(x=etimes,  color='b', linestyle='--', linewidth=.6)

                            # if global_min is not None and global_max is not None:
                            #     ax.axhline(y=global_min, color='r', linestyle='--', linewidth=.6)
                            #     ax.axhline(y=global_max, color='r', linestyle='--', linewidth=.6)

                            pf.save_fig(save_dir, sname)

                            # Plot data with extreme values, data outside global ranges and outliers removed
                            fig, ax = pf.plot_timeseries_all(x_nonan_nofv_nE_nogr, y_nonan_nofv_nE_nogr, sv, sv_units, stdev=5)
                            ax.set_title((title + '\nDeployments: ' + str(sorted(deployments)) + '\n' + t0 + ' - ' + t1),
                                         fontsize=8)
                            for etimes in end_times:
                                ax.axvline(x=etimes,  color='b', linestyle='--', linewidth=.6)

                            # if global_min is not None and global_max is not None:
                            #     ax.axhline(y=global_min, color='r', linestyle='--', linewidth=.6)
                            #     ax.axhline(y=global_max, color='r', linestyle='--', linewidth=.6)

                            sfile = '_'.join((sname, 'rmoutliers'))
                            pf.save_fig(save_dir, sfile)
Ejemplo n.º 25
0
def main(folder, out, time_break):
    """
    files: url to an .nc/.ncml file or the path to a text file containing .nc/.ncml links. A # at the front will skip links in the text file.
    out: Directory to save plots
    """
    with xr.open_mfdataset(folder, mask_and_scale=False) as ds:
        # change dimensions from 'obs' to 'time'
        ds = ds.swap_dims({'obs': 'time'})
        ds_variables = ds.data_vars.keys()  # List of dataset variables
        stream = ds.stream  # List stream name associated with the data
        title_pre = mk_str(ds.attrs, 't')  # , var, tt0, tt1, 't')
        save_pre = mk_str(ds.attrs, 's')  # , var, tt0, tt1, 's')
        platform = ds.subsite
        node = ds.node
        sensor = ds.sensor
        save_dir = os.path.join(out, ds.subsite, ds.node, ds.stream,
                                'timeseries')
        cf.create_dir(save_dir)
        try:
            eng = stream_vars[
                stream]  # select specific streams engineering variables
        except KeyError:
            eng = ['']

        misc = [
            'timestamp', 'provenance', 'qc', 'id', 'obs', 'deployment',
            'volts', 'counts', 'quality_flag'
        ]

        reg_ex = re.compile('|'.join(eng + misc))  # make regular expression

        #  keep variables that are not in the regular expression
        sci_vars = [s for s in ds_variables if not reg_ex.search(s)]

        # t0, t1 = pf.get_rounded_start_and_end_times(ds_disk['time'].data)
        # tI = (pd.to_datetime(t0) + (pd.to_datetime(t1) - pd.to_datetime(t0)) / 2)
        # time_list = [[t0, t1], [t0, tI], [tI, t1]]

        times = np.unique(ds[time_break])

        for t in times:
            time_ind = t == ds[time_break].data
            for var in sci_vars:
                x = dict(data=ds['time'].data[time_ind],
                         info=dict(label='Time', units='GMT'))
                t0 = pd.to_datetime(
                    x['data'].min()).strftime('%Y-%m-%dT%H%M%00')
                t1 = pd.to_datetime(
                    x['data'].max()).strftime('%Y-%m-%dT%H%M%00')
                try:
                    sci = ds[var]
                    print var
                    # sci = sub_ds[var]
                except UnicodeEncodeError:  # some comments have latex characters
                    ds[var].attrs.pop('comment')  # remove from the attributes
                    sci = ds[var]  # or else the variable won't load

                # define possible pressure variables
                pressure_vars = [
                    'seawater_pressure', 'sci_water_pressure_dbar',
                    'ctdgv_m_glider_instrument_recovered-sci_water_pressure_dbar',
                    'ctdgv_m_glider_instrument-sci_water_pressure_dbar'
                ]
                rePressure = re.compile('|'.join(pressure_vars))

                # define y as pressure variable
                pressure = [s for s in sci.variables if rePressure.search(s)]
                pressure = ''.join(pressure)
                y = sci.variables[pressure]
                yN = pressure
                y_units = sci.units

                try:
                    y_lab = sci.long_name
                except AttributeError:
                    y_lab = sci.standard_name
                y = dict(data=sci.data[time_ind],
                         info=dict(label=y_lab,
                                   units=sci.units,
                                   var=var,
                                   platform=platform,
                                   node=node,
                                   sensor=sensor))

                title = title_pre + var

                # plot timeseries with outliers
                fig, ax = pf.auto_plot(x,
                                       y,
                                       title,
                                       stdev=None,
                                       line_style='r-o',
                                       g_range=True)
                pf.resize(width=12, height=8.5)  # Resize figure

                save_name = '{}-{}-{}_{}_{}-{}'.format(platform, node, sensor,
                                                       var, t0, t1)
                pf.save_fig(save_dir, save_name, res=150)  # Save figure
                plt.close('all')

                # plot z variable each time

                fig, ax = pf.depth_cross_section(x,
                                                 y,
                                                 title,
                                                 stdev=1,
                                                 line_style='r-o',
                                                 g_range=True)

                pf.resize(width=12, height=8.5)  # Resize figure

                save_name = '{}-{}-{}_{}_{}-{}_outliers_removed'.format(
                    platform, node, sensor, var, t0, t1)
                pf.save_fig(save_dir, save_name, res=150)  # Save figure
                plt.close('all')

            del x, y
def main(url_list, sDir, deployment_num, start_time, end_time, preferred_only,
         zdbar, n_std, inpercentile, zcell_size):
    rd_list = []
    for uu in url_list:
        elements = uu.split('/')[-2].split('-')
        rd = '-'.join((elements[1], elements[2], elements[3], elements[4]))
        if rd not in rd_list and 'ENG' not in rd and 'ADCP' not in rd:
            rd_list.append(rd)

    for r in rd_list:
        print('\n{}'.format(r))
        datasets = []
        for u in url_list:
            splitter = u.split('/')[-2].split('-')
            rd_check = '-'.join(
                (splitter[1], splitter[2], splitter[3], splitter[4]))
            if rd_check == r:
                udatasets = cf.get_nc_urls([u])
                datasets.append(udatasets)
        datasets = list(itertools.chain(*datasets))
        fdatasets = []
        if preferred_only == 'yes':
            # get the preferred stream information
            ps_df, n_streams = cf.get_preferred_stream_info(r)
            for index, row in ps_df.iterrows():
                for ii in range(n_streams):
                    try:
                        rms = '-'.join((r, row[ii]))
                    except TypeError:
                        continue
                    for dd in datasets:
                        spl = dd.split('/')[-2].split('-')
                        catalog_rms = '-'.join(
                            (spl[1], spl[2], spl[3], spl[4], spl[5], spl[6]))
                        fdeploy = dd.split('/')[-1].split('_')[0]
                        if rms == catalog_rms and fdeploy == row['deployment']:
                            fdatasets.append(dd)
        else:
            fdatasets = datasets

        main_sensor = r.split('-')[-1]
        fdatasets_sel = cf.filter_collocated_instruments(
            main_sensor, fdatasets)

        for fd in fdatasets_sel:
            part_d = fd.split('/')[-1]
            print('\n{}'.format(part_d))
            ds = xr.open_dataset(fd, mask_and_scale=False)
            ds = ds.swap_dims({'obs': 'time'})

            fname, subsite, refdes, method, stream, deployment = cf.nc_attributes(
                fd)
            array = subsite[0:2]
            sci_vars = cf.return_science_vars(stream)

            # if 'CE05MOAS' in r or 'CP05MOAS' in r:  # for coastal gliders, get m_water_depth for bathymetry
            #     eng = '-'.join((r.split('-')[0], r.split('-')[1], '00-ENG000000', method, 'glider_eng'))
            #     eng_url = [s for s in url_list if eng in s]
            #     if len(eng_url) == 1:
            #         eng_datasets = cf.get_nc_urls(eng_url)
            #         # filter out collocated datasets
            #         eng_dataset = [j for j in eng_datasets if (eng in j.split('/')[-1] and deployment in j.split('/')[-1])]
            #         if len(eng_dataset) > 0:
            #             ds_eng = xr.open_dataset(eng_dataset[0], mask_and_scale=False)
            #             t_eng = ds_eng['time'].values
            #             m_water_depth = ds_eng['m_water_depth'].values
            #
            #             # m_altimeter_status = 0 means a good reading (not nan or -1)
            #             try:
            #                 eng_ind = ds_eng['m_altimeter_status'].values == 0
            #             except KeyError:
            #                 eng_ind = (~np.isnan(m_water_depth)) & (m_water_depth >= 0)
            #
            #             m_water_depth = m_water_depth[eng_ind]
            #             t_eng = t_eng[eng_ind]
            #
            #             # get rid of any remaining nans or fill values
            #             eng_ind2 = (~np.isnan(m_water_depth)) & (m_water_depth >= 0)
            #             m_water_depth = m_water_depth[eng_ind2]
            #             t_eng = t_eng[eng_ind2]
            #         else:
            #             print('No engineering file for deployment {}'.format(deployment))
            #             m_water_depth = None
            #             t_eng = None
            #     else:
            #         m_water_depth = None
            #         t_eng = None
            # else:
            #     m_water_depth = None
            #     t_eng = None

            if deployment_num is not None:
                if int(int(deployment[-4:])) is not deployment_num:
                    print(type(int(deployment[-4:])), type(deployment_num))
                    continue

            if start_time is not None and end_time is not None:
                ds = ds.sel(time=slice(start_time, end_time))
                if len(ds['time'].values) == 0:
                    print(
                        'No data to plot for specified time range: ({} to {})'.
                        format(start_time, end_time))
                    continue
                stime = start_time.strftime('%Y-%m-%d')
                etime = end_time.strftime('%Y-%m-%d')
                ext = stime + 'to' + etime  # .join((ds0_method, ds1_method
                save_dir_profile = os.path.join(sDir, array, subsite, refdes,
                                                'profile_plots', deployment,
                                                ext)
                save_dir_xsection = os.path.join(sDir, array, subsite, refdes,
                                                 'xsection_plots', deployment,
                                                 ext)
                save_dir_4d = os.path.join(sDir, array, subsite, refdes,
                                           'xsection_plots_4d', deployment,
                                           ext)
            else:
                save_dir_profile = os.path.join(sDir, array, subsite, refdes,
                                                'profile_plots', deployment)
                save_dir_xsection = os.path.join(sDir, array, subsite, refdes,
                                                 'xsection_plots', deployment)
                save_dir_4d = os.path.join(sDir, array, subsite, refdes,
                                           'xsection_plots_4d', deployment)

            texclude_dir = os.path.join(sDir, array, subsite, refdes,
                                        'time_to_exclude')
            cf.create_dir(texclude_dir)

            time1 = ds['time'].values
            try:
                ds_lat1 = ds['lat'].values
            except KeyError:
                ds_lat1 = None
                print('No latitude variable in file')
            try:
                ds_lon1 = ds['lon'].values
            except KeyError:
                ds_lon1 = None
                print('No longitude variable in file')

            # get pressure variable
            pvarname, y1, y_units, press, y_fillvalue = cf.add_pressure_to_dictionary_of_sci_vars(
                ds)

            # prepare file to list timestamps with suspect data  for each data parameter
            stat_data = pd.DataFrame(
                columns=['deployments', 'time_to_exclude'])
            file_exclude = '{}/{}_{}_{}_excluded_timestamps.csv'.format(
                texclude_dir, deployment, refdes, method)
            stat_data.to_csv(file_exclude, index=True)

            # loop through sensor-data parameters
            for sv in sci_vars:
                print(sv)
                if 'pressure' not in sv:
                    z1 = ds[sv].values
                    fv = ds[sv]._FillValue
                    sv_units = ds[sv].units

                    # Check if the array is all NaNs
                    if sum(np.isnan(z1)) == len(z1):
                        print('Array of all NaNs - skipping plot.')
                        continue

                    # Check if the array is all fill values
                    elif len(z1[z1 != fv]) == 0:
                        print('Array of all fill values - skipping plot.')
                        continue

                    else:
                        # remove unreasonable pressure data (e.g. for surface piercing profilers)
                        if zdbar:
                            po_ind = (0 < y1) & (y1 < zdbar)
                            n_zdbar = np.sum(~po_ind)
                            tm = time1[po_ind]
                            y = y1[po_ind]
                            z = z1[po_ind]
                            ds_lat = ds_lat1[po_ind]
                            ds_lon = ds_lon1[po_ind]
                            print('{} in water depth > {} dbar'.format(
                                n_zdbar, zdbar))
                        else:
                            tm = time1
                            y = y1
                            z = z1
                            ds_lat = ds_lat1
                            ds_lon = ds_lon1

                        # reject erroneous data
                        dtime, zpressure, ndata, lenfv, lennan, lenev, lengr, global_min, global_max, lat, lon = \
                            cf.reject_erroneous_data(r, sv, tm, y, z, fv, ds_lat, ds_lon)

                        # get rid of 0.0 data
                        if sv == 'salinity':
                            ind = ndata > 30
                        elif sv == 'density':
                            ind = ndata > 1022.5
                        elif sv == 'conductivity':
                            ind = ndata > 3.45
                        else:
                            ind = ndata > 0
                        # if sv == 'sci_flbbcd_chlor_units':
                        #     ind = ndata < 7.5
                        # elif sv == 'sci_flbbcd_cdom_units':
                        #     ind = ndata < 25
                        # else:
                        #     ind = ndata > 0.0

                        # if 'CTD' in r:
                        #     ind = zpressure > 0.0
                        # else:
                        #     ind = ndata > 0.0

                        lenzero = np.sum(~ind)
                        dtime = dtime[ind]
                        zpressure = zpressure[ind]
                        ndata = ndata[ind]
                        if ds_lat is not None and ds_lon is not None:
                            lat = lat[ind]
                            lon = lon[ind]
                        else:
                            lat = None
                            lon = None

                        if len(dtime) > 0:
                            # reject time range from data portal file export
                            t_portal, z_portal, y_portal, lat_portal, lon_portal = \
                                cf.reject_timestamps_dataportal(subsite, r, dtime, zpressure, ndata, lat, lon)

                            print(
                                'removed {} data points using visual inspection of data'
                                .format(len(ndata) - len(z_portal)))

                            # create data groups
                            if len(y_portal) > 0:
                                columns = ['tsec', 'dbar', str(sv)]
                                min_r = int(round(min(y_portal) - zcell_size))
                                max_r = int(round(max(y_portal) + zcell_size))
                                ranges = list(range(min_r, max_r, zcell_size))

                                groups, d_groups = gt.group_by_depth_range(
                                    t_portal, y_portal, z_portal, columns,
                                    ranges)

                                if 'scatter' in sv:
                                    n_std = None  # to use percentile
                                else:
                                    n_std = n_std

                                #  identifying timestamps from percentile analysis
                                y_avg, n_avg, n_min, n_max, n0_std, n1_std, l_arr, time_ex = cf.reject_timestamps_in_groups(
                                    groups, d_groups, n_std, inpercentile)
                                """
                                writing timestamps to .csv file to use with data_range.py script
                                """
                                if len(time_ex) != 0:
                                    t_exclude = time_ex[0]
                                    for i in range(
                                            len(time_ex))[1:len(time_ex)]:
                                        t_exclude = '{}, {}'.format(
                                            t_exclude, time_ex[i])

                                    stat_data = pd.DataFrame(
                                        {
                                            'deployments': deployment,
                                            'time_to_exclude': t_exclude
                                        },
                                        index=[sv])
                                    stat_data.to_csv(file_exclude,
                                                     index=True,
                                                     mode='a',
                                                     header=False)

                                #  rejecting timestamps from percentile analysis
                                if len(time_ex) > 0:
                                    t_nospct, z_nospct, y_nospct = cf.reject_suspect_data(
                                        t_portal, y_portal, z_portal, time_ex)
                                else:
                                    t_nospct = t_portal
                                    z_nospct = z_portal
                                    y_nospct = y_portal
                                """
                                Plot data
                                """
                                if len(t_nospct) > 0:
                                    if len(t_nospct) != len(dtime):
                                        cf.create_dir(save_dir_profile)
                                        cf.create_dir(save_dir_xsection)
                                        sname = '-'.join((r, method, sv))
                                        sfile = '_'.join(
                                            ('rm_suspect_data', sname,
                                             pd.to_datetime(
                                                 t_nospct.min()).strftime(
                                                     '%Y%m%d')))

                                        t0 = pd.to_datetime(
                                            t_nospct.min()).strftime(
                                                '%Y-%m-%dT%H:%M:%S')
                                        t1 = pd.to_datetime(
                                            t_nospct.max()).strftime(
                                                '%Y-%m-%dT%H:%M:%S')
                                        title = ' '.join(
                                            (deployment, refdes,
                                             method)) + '\n' + t0 + ' to ' + t1

                                        if zdbar:
                                            leg_text = (
                                                'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges '
                                                '[{} - {}], {} unreasonable values'
                                                .format(
                                                    lenfv, lennan, lenev,
                                                    lengr, global_min,
                                                    global_max, lenzero) +
                                                '\nremoved {} in the upper and lower {} percentile of data grouped in {} '
                                                'dbar segments'.format(
                                                    len(z_portal) -
                                                    len(z_nospct),
                                                    inpercentile, zcell_size) +
                                                '\nexcluded {} suspect data points when inspected visually'
                                                .format(
                                                    len(ndata) - len(z_portal))
                                                +
                                                '\nexcluded {} suspect data in water depth greater than {} dbar'
                                                .format(n_zdbar, zdbar), )

                                        elif n_std:
                                            leg_text = (
                                                'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}], '
                                                '{} unreasonable values'.
                                                format(lenfv, lennan, lenev,
                                                       lengr, global_min,
                                                       global_max, lenzero) +
                                                '\nremoved {} data points +/- {} SD of data grouped in {} dbar segments'
                                                .format(
                                                    len(z_portal) -
                                                    len(z_nospct), n_std,
                                                    zcell_size) +
                                                '\nexcluded {} suspect data points when inspected visually'
                                                .format(
                                                    len(ndata) -
                                                    len(z_portal)), )
                                        else:
                                            leg_text = (
                                                'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}], '
                                                '{} unreasonable values'.
                                                format(lenfv, lennan, lenev,
                                                       lengr, global_min,
                                                       global_max, lenzero) +
                                                '\nremoved {} in the upper and lower {} percentile of data grouped in {} dbar segments'
                                                .format(
                                                    len(z_portal) -
                                                    len(z_nospct),
                                                    inpercentile, zcell_size) +
                                                '\nexcluded {} suspect data points when inspected visually'
                                                .format(
                                                    len(ndata) -
                                                    len(z_portal)), )
                                        '''
                                        profile plot
                                        '''
                                        xlabel = sv + " (" + sv_units + ")"
                                        ylabel = press[0] + " (" + y_units[
                                            0] + ")"
                                        clabel = 'Time'

                                        # plot non-erroneous data
                                        print('plotting profile')
                                        fig, ax = pf.plot_profiles(z_nospct,
                                                                   y_nospct,
                                                                   t_nospct,
                                                                   ylabel,
                                                                   xlabel,
                                                                   clabel,
                                                                   stdev=None)

                                        ax.set_title(title, fontsize=9)
                                        ax.plot(n_avg, y_avg, '-k')
                                        #ax.fill_betweenx(y_avg, n0_std, n1_std, color='m', alpha=0.2)
                                        ax.legend(leg_text,
                                                  loc='upper center',
                                                  bbox_to_anchor=(0.5, -0.17),
                                                  fontsize=6)
                                        fig.tight_layout()
                                        pf.save_fig(save_dir_profile, sfile)
                                        '''
                                        xsection plot
                                        '''
                                        print('plotting xsection')
                                        clabel = sv + " (" + sv_units + ")"
                                        ylabel = press[0] + " (" + y_units[
                                            0] + ")"

                                        # plot bathymetry only within data time ranges
                                        # if t_eng is not None:
                                        #     eng_ind = (t_eng >= np.nanmin(t_array)) & (t_eng <= np.nanmax(t_array))
                                        #     t_eng = t_eng[eng_ind]
                                        #     m_water_depth = m_water_depth[eng_ind]

                                        # plot non-erroneous data
                                        fig, ax, bar = pf.plot_xsection(
                                            subsite,
                                            t_nospct,
                                            y_nospct,
                                            z_nospct,
                                            clabel,
                                            ylabel,
                                            t_eng=None,
                                            m_water_depth=None,
                                            inpercentile=inpercentile,
                                            stdev=None)

                                        ax.set_title(title, fontsize=9)
                                        ax.legend(leg_text,
                                                  loc='upper center',
                                                  bbox_to_anchor=(0.5, -0.17),
                                                  fontsize=6)
                                        fig.tight_layout()
                                        pf.save_fig(save_dir_xsection, sfile)
def main(files, out, east_var, north_var, up_var, err_var):
    """
    files: url to an .nc/.ncml file or the path to a text file containing .nc/.ncml links. A # at the front will skip links in the text file.
    out: Directory to save plots
    """
    fname, ext = os.path.splitext(files)
    if ext in '.nc':
        list_files = [files]
    elif ext in '.ncml':
        list_files = [files]
    else:
        list_files = read_file(files)

    stream_vars = pf.load_variable_dict(var='eng')  # load engineering variables
    # for nc in list_files:
    #     print nc

        # the engine that xarray uses can be changed as specified here 
        # http://xarray.pydata.org/en/stable/generated/xarray.open_dataset.html#xarray.open_dataset
    for nc in list_files:
        print nc
        with xr.open_dataset(nc, mask_and_scale=False) as ds_disk:
            #with xr.open_mfdataset(nc, engine='netcdf4') as ds_disk:
            # change dimensions from 'obs' to 'time'
            ds_disk = ds_disk.swap_dims({'obs': 'time'})
            ds_variables = ds_disk.data_vars.keys()  # List of dataset variables
            stream = ds_disk.stream  # List stream name associated with the data
            deployment = 'D0000{}'.format(str(numpy.unique(ds_disk.deployment)[0]))
            title_pre = mk_str(ds_disk.attrs, 't')  # , var, tt0, tt1, 't')
            save_pre = mk_str(ds_disk.attrs, 's')  # , var, tt0, tt1, 's')
            save_dir = os.path.join(out, ds_disk.subsite, deployment, ds_disk.node, ds_disk.stream, 'pcolor')
            cf.create_dir(save_dir)

            # t0, t1 = cf.get_rounded_start_and_end_times(ds_disk['time'].data)
            # tI = t0 + t1 - (t0 / 2)
            # time_list = [[t0, t1], [t0, tI], [tI, t1]]
            # time_list = [[t0, t1]]

            # for period in time_list:
            #     tt0 = period[0]
            #     tt1 = period[1]
            #     sub_ds = ds_disk.sel(time=slice(str(tt0), str(tt1)))

            north = ds_disk[north_var]
            east = ds_disk[east_var]
            up = ds_disk[up_var]
            error = ds_disk[err_var]

            try:
                bins = ds_disk['bin_depths']
                bins = dict(data=bins.data.T, info=dict(label=bins.long_name, units=bins.units))
            except KeyError:
                # use the matrix indices to plot
                bins = numpy.zeros_like(east.data)
                for i, item in enumerate(east):
                    for jj, xtem in enumerate(east[i]):
                        bins[i][jj] = jj
                bins = numpy.reshape(bins,(bins.shape[-1],bins.shape[0]))
                bins = dict(data=bins, label='bin_indices', units='')

                # the correct way to do this is to calculate the bin_depths, for that you need:
                # 9 First Cell Range(meters) (rounded bin_1_distance average, m)
                # 73 deployment depth of the ADCP instrument (pull from asset-management, depth in m)
                # 21 number of bins (num_cells, m)
                # 4  cell length (cell_length, m)
                # equation with the numbers above would be:
                # depths = 73 - 9 - ([1:21]-1)*4;



            time = dict(data=ds_disk['time'].data, info=dict(label=ds_disk['time'].standard_name, units='GMT'))
            #bins = dict(data=bins.data.T, info=dict(label=bins.long_name, units=bins.units))
            north = dict(data=north.data.T, info=dict(label=north.long_name, units=north.units))
            east = dict(data=east.data.T, info=dict(label=east.long_name, units=east.units))
            up = dict(data=up.data.T, info=dict(label=up.long_name, units=up.units))
            error = dict(data=error.data.T, info=dict(label=error.long_name, units=error.units))

            sname_ew = save_pre + 'E-W-ADCP'
            title = title_pre
            fig, axs = pf.adcp(time, bins, north, east, title)
            pf.resize(width=12, height=8.5)  # Resize figure
            pf.save_fig(save_dir, sname_ew, res=250)  # Save figure

            sname_ur = save_pre + 'U-R-ADCP'
            fig, axs = pf.adcp(time, bins, up, error, title)
            pf.resize(width=12, height=8.5)  # Resize figure
            pf.save_fig(save_dir, sname_ur, res=250)  # Save figure

            plt.close('all')
Ejemplo n.º 28
0
def main(url_list, sDir, mDir, zcell_size, zdbar, start_time, end_time):
    """""
    URL : path to instrument data by methods
    sDir : path to the directory on your machine to save files
    plot_type: folder name for a plot type

    """ ""
    rd_list = []
    ms_list = []
    for uu in url_list:
        elements = uu.split('/')[-2].split('-')
        rd = '-'.join((elements[1], elements[2], elements[3], elements[4]))
        ms = uu.split(rd + '-')[1].split('/')[0]
        if rd not in rd_list:
            rd_list.append(rd)
        if ms not in ms_list:
            ms_list.append(ms)
    ''' 
    separate different instruments
    '''
    for r in rd_list:
        print('\n{}'.format(r))
        subsite = r.split('-')[0]
        array = subsite[0:2]
        main_sensor = r.split('-')[-1]

        # read in the analysis file
        dr_data = cf.refdes_datareview_json(r)

        # get the preferred stream information
        ps_df, n_streams = cf.get_preferred_stream_info(r)

        # get science variable long names from the Data Review Database
        stream_sci_vars = cd.sci_var_long_names(r)
        #stream_vars = cd.var_long_names(r)

        # check if the science variable long names are the same for each stream and initialize empty arrays
        sci_vars_dict0 = cd.sci_var_long_names_check(stream_sci_vars)

        # get the list of data files and filter out collocated instruments and other streams
        datasets = []
        for u in url_list:
            print(u)
            splitter = u.split('/')[-2].split('-')
            rd_check = '-'.join(
                (splitter[1], splitter[2], splitter[3], splitter[4]))
            if rd_check == r:
                udatasets = cf.get_nc_urls([u])
                datasets.append(udatasets)

        datasets = list(itertools.chain(*datasets))
        fdatasets = cf.filter_collocated_instruments(main_sensor, datasets)
        fdatasets = cf.filter_other_streams(r, ms_list, fdatasets)

        # select the list of data files from the preferred dataset for each deployment
        fdatasets_final = []
        for ii in range(len(ps_df)):
            for x in fdatasets:
                if ps_df['deployment'][ii] in x and ps_df[0][ii] in x:
                    fdatasets_final.append(x)

        # build dictionary of science data from the preferred dataset for each deployment
        print('\nAppending data from files')
        et = []
        sci_vars_dict, y_unit, y_name = cd.append_evaluated_science_data(
            sDir, ps_df, n_streams, r, fdatasets_final, sci_vars_dict0, et,
            start_time, end_time)

        # get end times of deployments
        deployments = []
        end_times = []
        for index, row in ps_df.iterrows():
            deploy = row['deployment']
            deploy_info = cf.get_deployment_information(
                dr_data, int(deploy[-4:]))
            deployments.append(int(deploy[-4:]))
            end_times.append(pd.to_datetime(deploy_info['stop_date']))
        """
        create a data-ranges table and figure for full data time range
        """
        # create a folder to save data ranges
        save_dir_stat = os.path.join(mDir, array, subsite)
        cf.create_dir(save_dir_stat)

        save_fdir = os.path.join(sDir, array, subsite, r, 'data_range')
        cf.create_dir(save_fdir)
        stat_df = pd.DataFrame()

        for m, n in sci_vars_dict.items():
            for sv, vinfo in n['vars'].items():
                print(vinfo['var_name'])
                if len(vinfo['t']) < 1:
                    print('no variable data to plot')
                    continue
                else:
                    sv_units = vinfo['units'][0]
                    fv = vinfo['fv'][0]
                    t = vinfo['t']
                    z = vinfo['values']
                    y = vinfo['pressure']

                # Check if the array is all NaNs
                if sum(np.isnan(z)) == len(z):
                    print('Array of all NaNs - skipping plot.')
                    continue
                # Check if the array is all fill values
                elif len(z[z != fv]) == 0:
                    print('Array of all fill values - skipping plot.')
                    continue
                else:
                    """
                    clean up data
                    """
                    # reject erroneous data
                    dtime, zpressure, ndata, lenfv, lennan, lenev, lengr, global_min, global_max = \
                        cf.reject_erroneous_data(r, sv, t, y, z, fv)

                    # reject timestamps from stat analysis
                    Dpath = '{}/{}/{}/{}/{}'.format(sDir, array, subsite, r,
                                                    'time_to_exclude')

                    onlyfiles = []
                    for item in os.listdir(Dpath):
                        if not item.startswith('.') and os.path.isfile(
                                os.path.join(Dpath, item)):
                            onlyfiles.append(join(Dpath, item))

                    dre = pd.DataFrame()
                    for nn in onlyfiles:
                        dr = pd.read_csv(nn)
                        dre = dre.append(dr, ignore_index=True)

                    drn = dre.loc[dre['Unnamed: 0'] == vinfo['var_name']]
                    list_time = []
                    for itime in drn.time_to_exclude:
                        ntime = itime.split(', ')
                        list_time.extend(ntime)

                    u_time_list = np.unique(list_time)
                    if len(u_time_list) != 0:
                        t_nospct, z_nospct, y_nospct = cf.reject_suspect_data(
                            dtime, zpressure, ndata, u_time_list)

                    print(
                        '{} using {} percentile of data grouped in {} dbar segments'
                        .format(
                            len(zpressure) - len(z_nospct), inpercentile,
                            zcell_size))

                    # reject time range from data portal file export
                    t_portal, z_portal, y_portal = cf.reject_timestamps_dataportal(
                        subsite, r, t_nospct, y_nospct, z_nospct)

                    print('{} using visual inspection of data'.format(
                        len(z_nospct) - len(z_portal), inpercentile,
                        zcell_size))

                    # reject data in a depth range
                    if zdbar is not None:
                        y_ind = y_portal < zdbar
                        t_array = t_portal[y_ind]
                        y_array = y_portal[y_ind]
                        z_array = z_portal[y_ind]
                    else:
                        y_ind = []
                        t_array = t_portal
                        y_array = y_portal
                        z_array = z_portal
                    print('{} in water depth > {} dbar'.format(
                        len(y_ind), zdbar))

                    if len(y_array) > 0:
                        if m == 'common_stream_placeholder':
                            sname = '-'.join((vinfo['var_name'], r))
                        else:
                            sname = '-'.join((vinfo['var_name'], r, m))
                        """
                        create data ranges for non - pressure data only
                        """

                        if 'pressure' in vinfo['var_name']:
                            pass
                        else:
                            columns = ['tsec', 'dbar', str(vinfo['var_name'])]
                            # create depth ranges
                            min_r = int(round(min(y_array) - zcell_size))
                            max_r = int(round(max(y_array) + zcell_size))
                            ranges = list(range(min_r, max_r, zcell_size))

                            # group data by depth
                            groups, d_groups = gt.group_by_depth_range(
                                t_array, y_array, z_array, columns, ranges)

                            print('writing data ranges for {}'.format(
                                vinfo['var_name']))
                            stat_data = groups.describe()[vinfo['var_name']]
                            stat_data.insert(loc=0,
                                             column='parameter',
                                             value=sv,
                                             allow_duplicates=False)
                            t_deploy = deployments[0]
                            for i in range(
                                    len(deployments))[1:len(deployments)]:
                                t_deploy = '{}, {}'.format(
                                    t_deploy, deployments[i])
                            stat_data.insert(loc=1,
                                             column='deployments',
                                             value=t_deploy,
                                             allow_duplicates=False)

                        stat_df = stat_df.append(stat_data, ignore_index=True)
                        """
                        plot full time range free from errors and suspect data
                        """

                        clabel = sv + " (" + sv_units + ")"
                        ylabel = (y_name[0][0] + " (" + y_unit[0][0] + ")")
                        title = ' '.join((r, m))

                        # plot non-erroneous -suspect data
                        fig, ax, bar = pf.plot_xsection(subsite,
                                                        t_array,
                                                        y_array,
                                                        z_array,
                                                        clabel,
                                                        ylabel,
                                                        inpercentile=None,
                                                        stdev=None)

                        ax.set_title(title, fontsize=9)
                        leg_text = (
                            'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}]'
                            .format(
                                len(z) - lenfv,
                                len(z) - lennan,
                                len(z) - lenev, lengr, global_min,
                                global_max) + '\n' +
                            ('removed {} in the upper and lower {} percentile of data grouped in {} dbar segments'
                             .format(
                                 len(zpressure) - len(z_nospct), inpercentile,
                                 zcell_size)), )

                        ax.legend(leg_text,
                                  loc='upper center',
                                  bbox_to_anchor=(0.5, -0.17),
                                  fontsize=6)

                        for ii in range(len(end_times)):
                            ax.axvline(x=end_times[ii],
                                       color='b',
                                       linestyle='--',
                                       linewidth=.8)
                            ax.text(end_times[ii],
                                    min(y_array) - 5,
                                    'End' + str(deployments[ii]),
                                    fontsize=6,
                                    style='italic',
                                    bbox=dict(
                                        boxstyle='round',
                                        ec=(0., 0.5, 0.5),
                                        fc=(1., 1., 1.),
                                    ))

                        fig.tight_layout()
                        sfile = '_'.join(('data_range', sname))
                        pf.save_fig(save_fdir, sfile)

            # write stat file
            stat_df.to_csv('{}/{}_data_ranges.csv'.format(save_dir_stat, r),
                           index=True,
                           float_format='%11.6f')
def main(sDir, plotting_sDir, url_list, sd_calc):
    dr = pd.read_csv('https://datareview.marine.rutgers.edu/notes/export')
    drn = dr.loc[dr.type == 'exclusion']
    rd_list = []
    for uu in url_list:
        elements = uu.split('/')[-2].split('-')
        rd = '-'.join((elements[1], elements[2], elements[3], elements[4]))
        if rd not in rd_list:
            rd_list.append(rd)

    for r in rd_list:
        print('\n{}'.format(r))
        datasets = []
        for u in url_list:
            splitter = u.split('/')[-2].split('-')
            rd_check = '-'.join(
                (splitter[1], splitter[2], splitter[3], splitter[4]))
            if rd_check == r:
                udatasets = cf.get_nc_urls([u])
                datasets.append(udatasets)
        datasets = list(itertools.chain(*datasets))
        fdatasets = []
        # get the preferred stream information
        ps_df, n_streams = cf.get_preferred_stream_info(r)
        pms = []
        for index, row in ps_df.iterrows():
            for ii in range(n_streams):
                try:
                    rms = '-'.join((r, row[ii]))
                    pms.append(row[ii])
                except TypeError:
                    continue
                for dd in datasets:
                    spl = dd.split('/')[-2].split('-')
                    catalog_rms = '-'.join(
                        (spl[1], spl[2], spl[3], spl[4], spl[5], spl[6]))
                    fdeploy = dd.split('/')[-1].split('_')[0]
                    if rms == catalog_rms and fdeploy == row['deployment']:
                        fdatasets.append(dd)

        main_sensor = r.split('-')[-1]
        fdatasets_sel = cf.filter_collocated_instruments(
            main_sensor, fdatasets)

        # find time ranges to exclude from analysis for data review database
        subsite = r.split('-')[0]
        subsite_node = '-'.join((subsite, r.split('-')[1]))

        drne = drn.loc[drn.reference_designator.isin(
            [subsite, subsite_node, r])]
        et = []
        for i, row in drne.iterrows():
            sdate = cf.format_dates(row.start_date)
            edate = cf.format_dates(row.end_date)
            et.append([sdate, edate])

        # get science variable long names from the Data Review Database
        stream_sci_vars = cd.sci_var_long_names(r)

        # check if the science variable long names are the same for each stream
        sci_vars_dict = cd.sci_var_long_names_check(stream_sci_vars)

        # get the preferred stream information
        ps_df, n_streams = cf.get_preferred_stream_info(r)

        # build dictionary of science data from the preferred dataset for each deployment
        print('\nAppending data from files')
        sci_vars_dict, pressure_unit, pressure_name = cd.append_science_data(
            ps_df, n_streams, r, fdatasets_sel, sci_vars_dict, et)

        # analyze combined dataset
        print('\nAnalyzing combined dataset and writing summary file')

        array = subsite[0:2]
        save_dir = os.path.join(sDir, array, subsite)
        cf.create_dir(save_dir)

        rows = []
        if ('FLM' in r) and (
                'CTDMO' in r
        ):  # calculate Flanking Mooring CTDMO stats based on pressure
            headers = [
                'common_stream_name', 'preferred_methods_streams',
                'deployments', 'long_name', 'units', 't0', 't1', 'fill_value',
                'global_ranges', 'n_all', 'press_min_max',
                'n_excluded_forpress', 'n_nans', 'n_fillvalues', 'n_grange',
                'define_stdev', 'n_outliers', 'n_stats', 'mean', 'min', 'max',
                'stdev', 'note'
            ]
        else:
            headers = [
                'common_stream_name', 'preferred_methods_streams',
                'deployments', 'long_name', 'units', 't0', 't1', 'fill_value',
                'global_ranges', 'n_all', 'n_nans', 'n_fillvalues', 'n_grange',
                'define_stdev', 'n_outliers', 'n_stats', 'mean', 'min', 'max',
                'stdev'
            ]

        for m, n in sci_vars_dict.items():
            print('\nSTREAM: ', m)
            if m == 'common_stream_placeholder':
                m = 'science_data_stream'
            if m == 'metbk_hourly':  # don't calculate ranges for metbk_hourly
                continue

            if ('FLM' in r) and (
                    'CTDMO' in r
            ):  # calculate Flanking Mooring CTDMO stats based on pressure
                # index the pressure variable to filter and calculate stats on the rest of the variables
                sv_press = 'Seawater Pressure'
                vinfo_press = n['vars'][sv_press]

                # first, index where data are nans, fill values, and outside of global ranges
                fv_press = list(np.unique(vinfo_press['fv']))[0]
                pdata = vinfo_press['values']

                [pind, __, __, __, __,
                 __] = index_dataset(r, vinfo_press['var_name'], pdata,
                                     fv_press)

                pdata_filtered = pdata[pind]
                [__, pmean, __, __, psd,
                 __] = cf.variable_statistics(pdata_filtered, None)

                # index of pressure = average of all 'valid' pressure data +/- 1 SD
                ipress_min = pmean - psd
                ipress_max = pmean + psd
                ind_press = (pdata >= ipress_min) & (pdata <= ipress_max)

                # calculate stats for all variables
                print('\nPARAMETERS:')
                for sv, vinfo in n['vars'].items():
                    print(sv)
                    fv_lst = np.unique(vinfo['fv']).tolist()
                    if len(fv_lst) == 1:
                        fill_value = fv_lst[0]
                    else:
                        print('No unique fill value for {}'.format(sv))

                    lunits = np.unique(vinfo['units']).tolist()
                    n_all = len(vinfo['t'])

                    # filter data based on pressure index
                    t_filtered = vinfo['t'][ind_press]
                    data_filtered = vinfo['values'][ind_press]
                    deploy_filtered = vinfo['deployments'][ind_press]

                    n_excluded = n_all - len(t_filtered)

                    [dataind, g_min, g_max, n_nan, n_fv,
                     n_grange] = index_dataset(r, vinfo['var_name'],
                                               data_filtered, fill_value)

                    t_final = t_filtered[dataind]
                    data_final = data_filtered[dataind]
                    deploy_final = deploy_filtered[dataind]

                    t0 = pd.to_datetime(
                        min(t_final)).strftime('%Y-%m-%dT%H:%M:%S')
                    t1 = pd.to_datetime(
                        max(t_final)).strftime('%Y-%m-%dT%H:%M:%S')
                    deploy = list(np.unique(deploy_final))
                    deployments = [int(dd) for dd in deploy]

                    if len(data_final) > 1:
                        [num_outliers, mean, vmin, vmax, sd, n_stats
                         ] = cf.variable_statistics(data_final, sd_calc)
                    else:
                        mean = None
                        vmin = None
                        vmax = None
                        sd = None
                        n_stats = None

                    note = 'restricted stats calculation to data points where pressure is within defined ranges' \
                           ' (average of all pressure data +/- 1 SD)'
                    rows.append([
                        m,
                        list(np.unique(pms)), deployments, sv, lunits, t0, t1,
                        fv_lst, [g_min, g_max], n_all,
                        [round(ipress_min, 2),
                         round(ipress_max,
                               2)], n_excluded, n_nan, n_fv, n_grange, sd_calc,
                        num_outliers, n_stats, mean, vmin, vmax, sd, note
                    ])

                    # plot CTDMO data used for stats
                    psave_dir = os.path.join(plotting_sDir, array, subsite, r,
                                             'timeseries_plots_stats')
                    cf.create_dir(psave_dir)

                    dr_data = cf.refdes_datareview_json(r)
                    deployments = []
                    end_times = []
                    for index, row in ps_df.iterrows():
                        deploy = row['deployment']
                        deploy_info = cf.get_deployment_information(
                            dr_data, int(deploy[-4:]))
                        deployments.append(int(deploy[-4:]))
                        end_times.append(
                            pd.to_datetime(deploy_info['stop_date']))

                    sname = '-'.join((r, sv))
                    fig, ax = pf.plot_timeseries_all(t_final,
                                                     data_final,
                                                     sv,
                                                     lunits[0],
                                                     stdev=None)
                    ax.set_title(
                        (r + '\nDeployments: ' + str(sorted(deployments)) +
                         '\n' + t0 + ' - ' + t1),
                        fontsize=8)
                    for etimes in end_times:
                        ax.axvline(x=etimes,
                                   color='k',
                                   linestyle='--',
                                   linewidth=.6)
                    pf.save_fig(psave_dir, sname)

                    if sd_calc:
                        sname = '-'.join((r, sv, 'rmoutliers'))
                        fig, ax = pf.plot_timeseries_all(t_final,
                                                         data_final,
                                                         sv,
                                                         lunits[0],
                                                         stdev=sd_calc)
                        ax.set_title(
                            (r + '\nDeployments: ' + str(sorted(deployments)) +
                             '\n' + t0 + ' - ' + t1),
                            fontsize=8)
                        for etimes in end_times:
                            ax.axvline(x=etimes,
                                       color='k',
                                       linestyle='--',
                                       linewidth=.6)
                        pf.save_fig(psave_dir, sname)

            else:
                if not sd_calc:
                    sdcalc = None

                print('\nPARAMETERS: ')
                for sv, vinfo in n['vars'].items():
                    print(sv)

                    fv_lst = np.unique(vinfo['fv']).tolist()
                    if len(fv_lst) == 1:
                        fill_value = fv_lst[0]
                    else:
                        print(fv_lst)
                        print('No unique fill value for {}'.format(sv))

                    lunits = np.unique(vinfo['units']).tolist()

                    t = vinfo['t']
                    if len(t) > 1:
                        data = vinfo['values']
                        n_all = len(t)

                        if 'SPKIR' in r or 'presf_abc_wave_burst' in m:
                            if 'SPKIR' in r:
                                [dd_data, g_min, g_max, n_nan, n_fv,
                                 n_grange] = index_dataset_2d(
                                     r, 'spkir_abj_cspp_downwelling_vector',
                                     data, fill_value)
                            else:
                                [dd_data, g_min, g_max, n_nan, n_fv,
                                 n_grange] = index_dataset_2d(
                                     r, 'presf_wave_burst_pressure', data,
                                     fill_value)
                            t_final = t
                            t0 = pd.to_datetime(
                                min(t_final)).strftime('%Y-%m-%dT%H:%M:%S')
                            t1 = pd.to_datetime(
                                max(t_final)).strftime('%Y-%m-%dT%H:%M:%S')
                            deploy_final = vinfo['deployments']
                            deploy = list(np.unique(deploy_final))
                            deployments = [int(dd) for dd in deploy]

                            num_outliers = []
                            mean = []
                            vmin = []
                            vmax = []
                            sd = []
                            n_stats = []
                            for i in range(len(dd_data)):
                                dd = data[i]
                                # drop nans before calculating stats
                                dd = dd[~np.isnan(dd)]
                                [
                                    num_outliersi, meani, vmini, vmaxi, sdi,
                                    n_statsi
                                ] = cf.variable_statistics(dd, sd_calc)
                                num_outliers.append(num_outliersi)
                                mean.append(meani)
                                vmin.append(vmini)
                                vmax.append(vmaxi)
                                sd.append(sdi)
                                n_stats.append(n_statsi)

                        else:
                            [dataind, g_min, g_max, n_nan, n_fv,
                             n_grange] = index_dataset(r, vinfo['var_name'],
                                                       data, fill_value)
                            t_final = t[dataind]
                            if len(t_final) > 0:
                                t0 = pd.to_datetime(
                                    min(t_final)).strftime('%Y-%m-%dT%H:%M:%S')
                                t1 = pd.to_datetime(
                                    max(t_final)).strftime('%Y-%m-%dT%H:%M:%S')
                                data_final = data[dataind]
                                # if sv == 'Dissolved Oxygen Concentration':
                                #     xx = (data_final > 0) & (data_final < 400)
                                #     data_final = data_final[xx]
                                #     t_final = t_final[xx]
                                # if sv == 'Seawater Conductivity':
                                #     xx = (data_final > 1) & (data_final < 400)
                                #     data_final = data_final[xx]
                                #     t_final = t_final[xx]
                                deploy_final = vinfo['deployments'][dataind]
                                deploy = list(np.unique(deploy_final))
                                deployments = [int(dd) for dd in deploy]

                                if len(data_final) > 1:
                                    [
                                        num_outliers, mean, vmin, vmax, sd,
                                        n_stats
                                    ] = cf.variable_statistics(
                                        data_final, sd_calc)
                                else:
                                    sdcalc = None
                                    num_outliers = None
                                    mean = None
                                    vmin = None
                                    vmax = None
                                    sd = None
                                    n_stats = None
                            else:
                                sdcalc = None
                                num_outliers = None
                                mean = None
                                vmin = None
                                vmax = None
                                sd = None
                                n_stats = None
                                deployments = None
                                t0 = None
                                t1 = None
                    else:
                        sdcalc = None
                        num_outliers = None
                        mean = None
                        vmin = None
                        vmax = None
                        sd = None
                        n_stats = None
                        deployments = None
                        t0 = None
                        t1 = None
                        t_final = []

                    if sd_calc:
                        print_sd = sd_calc
                    else:
                        print_sd = sdcalc

                    rows.append([
                        m,
                        list(np.unique(pms)), deployments, sv, lunits, t0, t1,
                        fv_lst, [g_min, g_max], n_all, n_nan, n_fv, n_grange,
                        print_sd, num_outliers, n_stats, mean, vmin, vmax, sd
                    ])

                    if len(t_final) > 0:
                        # plot data used for stats
                        psave_dir = os.path.join(
                            plotting_sDir, array, subsite, r,
                            'timeseries_reviewed_datarange')
                        cf.create_dir(psave_dir)

                        dr_data = cf.refdes_datareview_json(r)
                        deployments = []
                        end_times = []
                        for index, row in ps_df.iterrows():
                            deploy = row['deployment']
                            deploy_info = cf.get_deployment_information(
                                dr_data, int(deploy[-4:]))
                            deployments.append(int(deploy[-4:]))
                            end_times.append(
                                pd.to_datetime(deploy_info['stop_date']))

                        sname = '-'.join((r, sv))

                        # plot hourly averages for streaming data
                        if 'streamed' in sci_vars_dict[list(
                                sci_vars_dict.keys())[0]]['ms'][0]:
                            sname = '-'.join((sname, 'hourlyavg'))
                            df = pd.DataFrame({
                                'dfx': t_final,
                                'dfy': data_final
                            })
                            dfr = df.resample('H', on='dfx').mean()

                            # Plot all data
                            fig, ax = pf.plot_timeseries_all(dfr.index,
                                                             dfr['dfy'],
                                                             sv,
                                                             lunits[0],
                                                             stdev=None)
                            ax.set_title((r + '\nDeployments: ' +
                                          str(sorted(deployments)) + '\n' +
                                          t0 + ' - ' + t1),
                                         fontsize=8)
                            for etimes in end_times:
                                ax.axvline(x=etimes,
                                           color='k',
                                           linestyle='--',
                                           linewidth=.6)
                            pf.save_fig(psave_dir, sname)

                            if sd_calc:
                                sname = '-'.join(
                                    (sname, 'hourlyavg_rmoutliers'))
                                fig, ax = pf.plot_timeseries_all(dfr.index,
                                                                 dfr['dfy'],
                                                                 sv,
                                                                 lunits[0],
                                                                 stdev=sd_calc)
                                ax.set_title((r + '\nDeployments: ' +
                                              str(sorted(deployments)) + '\n' +
                                              t0 + ' - ' + t1),
                                             fontsize=8)
                                for etimes in end_times:
                                    ax.axvline(x=etimes,
                                               color='k',
                                               linestyle='--',
                                               linewidth=.6)
                                pf.save_fig(psave_dir, sname)

                        elif 'SPKIR' in r:
                            fig, ax = pf.plot_spkir(t_final, dd_data, sv,
                                                    lunits[0])
                            ax.set_title((r + '\nDeployments: ' +
                                          str(sorted(deployments)) + '\n' +
                                          t0 + ' - ' + t1),
                                         fontsize=8)
                            for etimes in end_times:
                                ax.axvline(x=etimes,
                                           color='k',
                                           linestyle='--',
                                           linewidth=.6)
                            pf.save_fig(psave_dir, sname)

                            # plot each wavelength
                            wavelengths = [
                                '412nm', '443nm', '490nm', '510nm', '555nm',
                                '620nm', '683nm'
                            ]
                            for wvi in range(len(dd_data)):
                                fig, ax = pf.plot_spkir_wv(
                                    t_final, dd_data[wvi], sv, lunits[0], wvi)
                                ax.set_title((r + '\nDeployments: ' +
                                              str(sorted(deployments)) + '\n' +
                                              t0 + ' - ' + t1),
                                             fontsize=8)
                                for etimes in end_times:
                                    ax.axvline(x=etimes,
                                               color='k',
                                               linestyle='--',
                                               linewidth=.6)
                                snamewvi = '-'.join((sname, wavelengths[wvi]))
                                pf.save_fig(psave_dir, snamewvi)
                        elif 'presf_abc_wave_burst' in m:
                            fig, ax = pf.plot_presf_2d(t_final, dd_data, sv,
                                                       lunits[0])
                            ax.set_title((r + '\nDeployments: ' +
                                          str(sorted(deployments)) + '\n' +
                                          t0 + ' - ' + t1),
                                         fontsize=8)
                            for etimes in end_times:
                                ax.axvline(x=etimes,
                                           color='k',
                                           linestyle='--',
                                           linewidth=.6)
                            snamewave = '-'.join((sname, m))
                            pf.save_fig(psave_dir, snamewave)

                        else:  # plot all data if not streamed
                            fig, ax = pf.plot_timeseries_all(t_final,
                                                             data_final,
                                                             sv,
                                                             lunits[0],
                                                             stdev=None)
                            ax.set_title((r + '\nDeployments: ' +
                                          str(sorted(deployments)) + '\n' +
                                          t0 + ' - ' + t1),
                                         fontsize=8)
                            for etimes in end_times:
                                ax.axvline(x=etimes,
                                           color='k',
                                           linestyle='--',
                                           linewidth=.6)
                            pf.save_fig(psave_dir, sname)

                            if sd_calc:
                                sname = '-'.join((r, sv, 'rmoutliers'))
                                fig, ax = pf.plot_timeseries_all(t_final,
                                                                 data_final,
                                                                 sv,
                                                                 lunits[0],
                                                                 stdev=sd_calc)
                                ax.set_title((r + '\nDeployments: ' +
                                              str(sorted(deployments)) + '\n' +
                                              t0 + ' - ' + t1),
                                             fontsize=8)
                                for etimes in end_times:
                                    ax.axvline(x=etimes,
                                               color='k',
                                               linestyle='--',
                                               linewidth=.6)
                                pf.save_fig(psave_dir, sname)

        fsum = pd.DataFrame(rows, columns=headers)
        fsum.to_csv('{}/{}_data_ranges.csv'.format(save_dir, r), index=False)
Ejemplo n.º 30
0
def main(url_list, sDir, stime, etime):
    if len(url_list) != 2:
        print('Please provide 2 reference designators for plotting')
    else:
        uu0 = url_list[0]
        uu1 = url_list[1]
        rd0 = uu0.split('/')[-2][20:47]
        rd1 = uu1.split('/')[-2][20:47]
        array = rd0[0:2]
        inst = rd0.split('-')[-1]

        datasets0 = []
        datasets1 = []
        for i in range(len(url_list)):
            udatasets = cf.get_nc_urls([url_list[i]])
            if i == 0:
                datasets0.append(udatasets)
            else:
                datasets1.append(udatasets)

        datasets0 = list(itertools.chain(*datasets0))
        datasets1 = list(itertools.chain(*datasets1))

        main_sensor0 = rd0.split('-')[-1]
        main_sensor1 = rd1.split('-')[-1]
        fdatasets0_sel = cf.filter_collocated_instruments(
            main_sensor0, datasets0)
        fdatasets1_sel = cf.filter_collocated_instruments(
            main_sensor1, datasets1)

        deployments = [
            dd.split('/')[-1].split('_')[0] for dd in fdatasets0_sel
        ]

        for d in deployments:
            fd0 = [x for x in fdatasets0_sel if d in x]
            fd1 = [x for x in fdatasets1_sel if d in x]

            ds0 = xr.open_dataset(fd0[0], mask_and_scale=False)
            ds0 = ds0.swap_dims({'obs': 'time'})
            ds1 = xr.open_dataset(fd1[0], mask_and_scale=False)
            ds1 = ds1.swap_dims({'obs': 'time'})

            if stime is not None and etime is not None:
                ds0 = ds0.sel(time=slice(stime, etime))
                ds1 = ds1.sel(time=slice(stime, etime))
                if len(ds0['time'].values) == 0:
                    print(
                        'No data to plot for specified time range: ({} to {})'.
                        format(start_time, end_time))
                    continue

            fname, subsite, refdes, method, stream, deployment = cf.nc_attributes(
                fd0[0])
            sci_vars = cf.return_science_vars(stream)

            save_dir_profile = os.path.join(sDir, array, subsite, inst,
                                            'profile_plots', deployment)
            cf.create_dir(save_dir_profile)

            # get pressure variable
            pvarname, y1, y_units, press, y_fillvalue = cf.add_pressure_to_dictionary_of_sci_vars(
                ds0)

            for sv in sci_vars:
                print('')
                print(sv)
                if 'pressure' not in sv:
                    fig, ax = plt.subplots()
                    plt.margins(y=.08, x=.02)
                    plt.grid()
                    title = ' '.join((deployment, subsite, inst, method))
                    sname = '-'.join((subsite, inst, method, sv))
                    for i in range(len(url_list)):
                        if i == 0:
                            ds = ds0
                        else:
                            ds = ds1
                        t = ds['time'].values
                        zpressure = ds[pvarname].values
                        z1 = ds[sv].values
                        fv = ds[sv]._FillValue
                        sv_units = ds[sv].units

                        # Check if the array is all NaNs
                        if sum(np.isnan(z1)) == len(z1):
                            print('Array of all NaNs - skipping plot.')
                            continue

                        # Check if the array is all fill values
                        elif len(z1[z1 != fv]) == 0:
                            print('Array of all fill values - skipping plot.')
                            continue

                        else:
                            # get rid of 0.0 data
                            if sv == 'salinity':
                                ind = z1 > 1
                            elif sv == 'density':
                                ind = z1 > 1000
                            elif sv == 'conductivity':
                                ind = z1 > 0.1
                            elif sv == 'dissolved_oxygen':
                                ind = z1 > 160
                            elif sv == 'estimated_oxygen_concentration':
                                ind = z1 > 200
                            else:
                                ind = z1 > 0
                            # if sv == 'sci_flbbcd_chlor_units':
                            #     ind = ndata < 7.5
                            # elif sv == 'sci_flbbcd_cdom_units':
                            #     ind = ndata < 25
                            # else:
                            #     ind = ndata > 0.0

                            # if 'CTD' in r:
                            #     ind = zpressure > 0.0
                            # else:
                            #     ind = ndata > 0.0

                            lenzero = np.sum(~ind)
                            dtime = t[ind]
                            zpressure = zpressure[ind]
                            zdata = z1[ind]

                            if len(dtime) > 0:
                                ax.scatter(zdata,
                                           zpressure,
                                           s=2,
                                           edgecolor='None')

                    xlabel = sv + " (" + sv_units + ")"
                    ylabel = press[0] + " (" + y_units[0] + ")"

                    ax.invert_yaxis()
                    # plt.xlim([-0.5, 0.5])
                    ax.set_xlabel(xlabel, fontsize=9)
                    ax.set_ylabel(ylabel, fontsize=9)
                    ax.set_title(title + '\nWFP02 (blue) & WFP03 (orange)',
                                 fontsize=9)
                    fig.tight_layout()
                    pf.save_fig(save_dir_profile, sname)
def main(sDir, url_list, start_time, end_time, preferred_only):
    rd_list = []
    for uu in url_list:
        elements = uu.split('/')[-2].split('-')
        rd = '-'.join((elements[1], elements[2], elements[3], elements[4]))
        if rd not in rd_list:
            rd_list.append(rd)

    for r in rd_list:
        print('\n{}'.format(r))
        datasets = []
        for u in url_list:
            splitter = u.split('/')[-2].split('-')
            rd_check = '-'.join(
                (splitter[1], splitter[2], splitter[3], splitter[4]))
            if rd_check == r:
                udatasets = cf.get_nc_urls([u])
                datasets.append(udatasets)
        datasets = list(itertools.chain(*datasets))
        fdatasets = []
        if preferred_only == 'yes':
            # get the preferred stream information
            ps_df, n_streams = cf.get_preferred_stream_info(r)
            for index, row in ps_df.iterrows():
                for ii in range(n_streams):
                    rms = '-'.join((r, row[ii]))
                    for dd in datasets:
                        spl = dd.split('/')[-2].split('-')
                        catalog_rms = '-'.join(
                            (spl[1], spl[2], spl[3], spl[4], spl[5], spl[6]))
                        fdeploy = dd.split('/')[-1].split('_')[0]
                        if rms == catalog_rms and fdeploy == row['deployment']:
                            fdatasets.append(dd)
        else:
            fdatasets = datasets

        main_sensor = r.split('-')[-1]
        fdatasets_sel = cf.filter_collocated_instruments(
            main_sensor, fdatasets)

        for fd in fdatasets_sel:
            with xr.open_dataset(fd, mask_and_scale=False) as ds:
                ds = ds.swap_dims({'obs': 'time'})

                if start_time is not None and end_time is not None:
                    ds = ds.sel(time=slice(start_time, end_time))
                    if len(ds['time'].values) == 0:
                        print(
                            'No data to plot for specified time range: ({} to {})'
                            .format(start_time, end_time))
                        continue

                fname, subsite, refdes, method, stream, deployment = cf.nc_attributes(
                    fd)
                print('\nPlotting {} {}'.format(r, deployment))
                array = subsite[0:2]
                save_dir = os.path.join(sDir, array, subsite, refdes,
                                        'timeseries_panel_plots')
                filename = '_'.join(fname.split('_')[:-1])
                sci_vars = cf.return_science_vars(stream)

                if len(sci_vars) > 1:
                    cf.create_dir(save_dir)
                    colors = cm.jet(np.linspace(0, 1, len(sci_vars)))

                    t = ds['time'].values
                    t0 = pd.to_datetime(t.min()).strftime('%Y-%m-%dT%H:%M:%S')
                    t1 = pd.to_datetime(t.max()).strftime('%Y-%m-%dT%H:%M:%S')
                    title = ' '.join((deployment, refdes, method))

                    # Plot data with outliers removed
                    fig, ax = pf.plot_timeseries_panel(ds, t, sci_vars, colors,
                                                       5)
                    plt.xticks(fontsize=7)
                    ax[0].set_title((title + '\n' + t0 + ' - ' + t1),
                                    fontsize=7)
                    sfile = '-'.join((filename, 'timeseries_panel', t0[:10]))
                    pf.save_fig(save_dir, sfile)
                else:
                    print(
                        'Only one science variable in file, no panel plots necessary'
                    )
Ejemplo n.º 32
0
def main(folder, out, time_break):
    """
    files: url to an .nc/.ncml file or the path to a text file containing .nc/.ncml links. A # at the front will skip links in the text file.
    out: Directory to save plots
    """
    with xr.open_mfdataset(folder, mask_and_scale=False) as ds:
        # change dimensions from 'obs' to 'time'
        ds = ds.swap_dims({'obs': 'time'})
        ds_variables = ds.data_vars.keys()  # List of dataset variables
        stream = ds.stream  # List stream name associated with the data
        title_pre = mk_str(ds.attrs, 't')  # , var, tt0, tt1, 't')
        save_pre = mk_str(ds.attrs, 's')  # , var, tt0, tt1, 's')
        platform = ds.subsite
        node = ds.node
        sensor = ds.sensor
        save_dir = os.path.join(out, ds.subsite, ds.node, ds.stream, 'timeseries')
        cf.create_dir(save_dir)
        try:
            eng = stream_vars[stream]  # select specific streams engineering variables
        except KeyError:
            eng = ['']

        misc = ['timestamp', 'provenance', 'qc', 'id', 'obs', 'deployment',
                'volts', 'counts', 'quality_flag']

        reg_ex = re.compile('|'.join(eng+misc))  # make regular expression

        #  keep variables that are not in the regular expression
        sci_vars = [s for s in ds_variables if not reg_ex.search(s)]

        # t0, t1 = pf.get_rounded_start_and_end_times(ds_disk['time'].data)
        # tI = (pd.to_datetime(t0) + (pd.to_datetime(t1) - pd.to_datetime(t0)) / 2)
        # time_list = [[t0, t1], [t0, tI], [tI, t1]]

        times = np.unique(ds[time_break])

        for t in times:
            time_ind = t == ds[time_break].data
            for var in sci_vars:
                x = dict(data=ds['time'].data[time_ind],
                         info=dict(label='Time', units='GMT'))
                t0 = pd.to_datetime(x['data'].min()).strftime('%Y-%m-%dT%H%M%00')
                t1 = pd.to_datetime(x['data'].max()).strftime('%Y-%m-%dT%H%M%00')
                try:
                    sci = ds[var]
                    print var
                    # sci = sub_ds[var]
                except UnicodeEncodeError: # some comments have latex characters
                    ds[var].attrs.pop('comment')  # remove from the attributes
                    sci = ds[var]  # or else the variable won't load
                

                # define possible pressure variables
                pressure_vars = ['seawater_pressure', 'sci_water_pressure_dbar',
                                 'ctdgv_m_glider_instrument_recovered-sci_water_pressure_dbar',
                                 'ctdgv_m_glider_instrument-sci_water_pressure_dbar']
                rePressure = re.compile('|'.join(pressure_vars))

                # define y as pressure variable   
                pressure = [s for s in sci.variables if rePressure.search(s)]
                pressure = ''.join(pressure)
                y = sci.variables[pressure]
                yN = pressure
                y_units = sci.units


                




                try:
                    y_lab = sci.long_name
                except AttributeError:
                    y_lab = sci.standard_name
                y = dict(data=sci.data[time_ind], info=dict(label=y_lab, units=sci.units, var=var,
                                                            platform=platform, node=node, sensor=sensor))





                title = title_pre + var

                # plot timeseries with outliers
                fig, ax = pf.auto_plot(x, y, title, stdev=None, line_style='r-o', g_range=True)
                pf.resize(width=12, height=8.5)  # Resize figure

                save_name = '{}-{}-{}_{}_{}-{}'.format(platform, node, sensor, var, t0, t1)
                pf.save_fig(save_dir, save_name, res=150)  # Save figure
                plt.close('all')



                # plot z variable each time



                fig, ax = pf.depth_cross_section(x, y, title, stdev=1, line_style='r-o', g_range=True)


                pf.resize(width=12, height=8.5)  # Resize figure

                save_name = '{}-{}-{}_{}_{}-{}_outliers_removed'.format(platform, node, sensor, var, t0, t1)
                pf.save_fig(save_dir, save_name, res=150)  # Save figure
                plt.close('all')

            del x, y
Ejemplo n.º 33
0
def main(url_list, sDir, deployment_num, start_time, end_time, preferred_only, n_std, inpercentile, zcell_size, zdbar):
    rd_list = []
    for uu in url_list:
        elements = uu.split('/')[-2].split('-')
        rd = '-'.join((elements[1], elements[2], elements[3], elements[4]))
        if rd not in rd_list and 'ENG' not in rd and 'ADCP' not in rd:
            rd_list.append(rd)

    for r in rd_list:
        print('\n{}'.format(r))
        datasets = []
        for u in url_list:
            splitter = u.split('/')[-2].split('-')
            rd_check = '-'.join((splitter[1], splitter[2], splitter[3], splitter[4]))
            if rd_check == r:
                udatasets = cf.get_nc_urls([u])
                datasets.append(udatasets)
        datasets = list(itertools.chain(*datasets))
        fdatasets = []
        if preferred_only == 'yes':
            # get the preferred stream information
            ps_df, n_streams = cf.get_preferred_stream_info(r)
            for index, row in ps_df.iterrows():
                for ii in range(n_streams):
                    try:
                        rms = '-'.join((r, row[ii]))
                    except TypeError:
                        continue
                    for dd in datasets:
                        spl = dd.split('/')[-2].split('-')
                        catalog_rms = '-'.join((spl[1], spl[2], spl[3], spl[4], spl[5], spl[6]))
                        fdeploy = dd.split('/')[-1].split('_')[0]
                        if rms == catalog_rms and fdeploy == row['deployment']:
                            fdatasets.append(dd)
        else:
            fdatasets = datasets

        main_sensor = r.split('-')[-1]
        fdatasets_sel = cf.filter_collocated_instruments(main_sensor, fdatasets)

        for fd in fdatasets_sel:
            part_d = fd.split('/')[-1]
            print('\n{}'.format(part_d))
            ds = xr.open_dataset(fd, mask_and_scale=False)
            ds = ds.swap_dims({'obs': 'time'})

            fname, subsite, refdes, method, stream, deployment = cf.nc_attributes(fd)
            array = subsite[0:2]
            sci_vars = cf.return_science_vars(stream)

            # if 'CE05MOAS' in r or 'CP05MOAS' in r:  # for coastal gliders, get m_water_depth for bathymetry
            #     eng = '-'.join((r.split('-')[0], r.split('-')[1], '00-ENG000000', method, 'glider_eng'))
            #     eng_url = [s for s in url_list if eng in s]
            #     if len(eng_url) == 1:
            #         eng_datasets = cf.get_nc_urls(eng_url)
            #         # filter out collocated datasets
            #         eng_dataset = [j for j in eng_datasets if (eng in j.split('/')[-1] and deployment in j.split('/')[-1])]
            #         if len(eng_dataset) > 0:
            #             ds_eng = xr.open_dataset(eng_dataset[0], mask_and_scale=False)
            #             t_eng = ds_eng['time'].values
            #             m_water_depth = ds_eng['m_water_depth'].values
            #
            #             # m_altitude = glider height above seafloor
            #             # m_depth = glider depth in the water column
            #             # m_altitude = ds_eng['m_altitude'].values
            #             # m_depth = ds_eng['m_depth'].values
            #             # calc_water_depth = m_altitude + m_depth
            #
            #             # m_altimeter_status = 0 means a good reading (not nan or -1)
            #             try:
            #                 eng_ind = ds_eng['m_altimeter_status'].values == 0
            #             except KeyError:
            #                 eng_ind = (~np.isnan(m_water_depth)) & (m_water_depth >= 0)
            #
            #             m_water_depth = m_water_depth[eng_ind]
            #             t_eng = t_eng[eng_ind]
            #
            #             # get rid of any remaining nans or fill values
            #             eng_ind2 = (~np.isnan(m_water_depth)) & (m_water_depth >= 0)
            #             m_water_depth = m_water_depth[eng_ind2]
            #             t_eng = t_eng[eng_ind2]
            #         else:
            #             print('No engineering file for deployment {}'.format(deployment))
            #             m_water_depth = None
            #             t_eng = None
            #     else:
            #         m_water_depth = None
            #         t_eng = None
            # else:
            #     m_water_depth = None
            #     t_eng = None

            if deployment_num is not None:
                if int(int(deployment[-4:])) is not deployment_num:
                    print(type(int(deployment[-4:])), type(deployment_num))
                    continue

            if start_time is not None and end_time is not None:
                ds = ds.sel(time=slice(start_time, end_time))
                if len(ds['time'].values) == 0:
                    print('No data to plot for specified time range: ({} to {})'.format(start_time, end_time))
                    continue
                stime = start_time.strftime('%Y-%m-%d')
                etime = end_time.strftime('%Y-%m-%d')
                ext = stime + 'to' + etime  # .join((ds0_method, ds1_method
                save_dir_profile = os.path.join(sDir, array, subsite, refdes, 'profile_plots', deployment, ext)
                save_dir_xsection = os.path.join(sDir, array, subsite, refdes, 'xsection_plots', deployment, ext)
                save_dir_4d = os.path.join(sDir, array, subsite, refdes, 'xsection_plots_4d', deployment, ext)
            else:
                save_dir_profile = os.path.join(sDir, array, subsite, refdes, 'profile_plots', deployment)
                save_dir_xsection = os.path.join(sDir, array, subsite, refdes, 'xsection_plots', deployment)
                save_dir_4d = os.path.join(sDir, array, subsite, refdes, 'xsection_plots_4d', deployment)

            time1 = ds['time'].values
            try:
                ds_lat1 = ds['lat'].values
            except KeyError:
                ds_lat1 = None
                print('No latitude variable in file')
            try:
                ds_lon1 = ds['lon'].values
            except KeyError:
                ds_lon1 = None
                print('No longitude variable in file')

            # get pressure variable
            pvarname, y1, y_units, press, y_fillvalue = cf.add_pressure_to_dictionary_of_sci_vars(ds)

            for sv in sci_vars:
                print('')
                print(sv)
                if 'pressure' not in sv:
                    if sv == 'spkir_abj_cspp_downwelling_vector':
                        pxso.pf_xs_spkir(ds, sv, time1, y1, ds_lat1, ds_lon1, zcell_size, inpercentile, save_dir_profile,
                                         save_dir_xsection, deployment, press, y_units, n_std, zdbar)
                    elif 'OPTAA' in r:
                        if sv not in ['wavelength_a', 'wavelength_c']:
                            pxso.pf_xs_optaa(ds, sv, time1, y1, ds_lat1, ds_lon1, zcell_size, inpercentile, save_dir_profile,
                                             save_dir_xsection, deployment, press, y_units, n_std, zdbar)
                    else:
                        z1 = ds[sv].values
                        fv = ds[sv]._FillValue
                        sv_units = ds[sv].units

                        # Check if the array is all NaNs
                        if sum(np.isnan(z1)) == len(z1):
                            print('Array of all NaNs - skipping plot.')
                            continue

                        # Check if the array is all fill values
                        elif len(z1[z1 != fv]) == 0:
                            print('Array of all fill values - skipping plot.')
                            continue

                        else:
                            # remove unreasonable pressure data (e.g. for surface piercing profilers)
                            if zdbar:
                                po_ind = (0 < y1) & (y1 < zdbar)
                                tm = time1[po_ind]
                                y = y1[po_ind]
                                z = z1[po_ind]
                                ds_lat = ds_lat1[po_ind]
                                ds_lon = ds_lon1[po_ind]
                            else:
                                tm = time1
                                y = y1
                                z = z1
                                ds_lat = ds_lat1
                                ds_lon = ds_lon1

                            # reject erroneous data
                            dtime, zpressure, ndata, lenfv, lennan, lenev, lengr, global_min, global_max, lat, lon = \
                                cf.reject_erroneous_data(r, sv, tm, y, z, fv, ds_lat, ds_lon)

                            # get rid of 0.0 data
                            if sv == 'salinity':
                                ind = ndata > 30
                            elif sv == 'density':
                                ind = ndata > 1022.5
                            elif sv == 'conductivity':
                                ind = ndata > 3.45
                            else:
                                ind = ndata > 0
                            # if sv == 'sci_flbbcd_chlor_units':
                            #     ind = ndata < 7.5
                            # elif sv == 'sci_flbbcd_cdom_units':
                            #     ind = ndata < 25
                            # else:
                            #     ind = ndata > 0.0

                            # if 'CTD' in r:
                            #     ind = zpressure > 0.0
                            # else:
                            #     ind = ndata > 0.0

                            lenzero = np.sum(~ind)
                            dtime = dtime[ind]
                            zpressure = zpressure[ind]
                            ndata = ndata[ind]
                            if ds_lat is not None and ds_lon is not None:
                                lat = lat[ind]
                                lon = lon[ind]
                            else:
                                lat = None
                                lon = None

                            if len(dtime) > 0:
                                # reject time range from data portal file export
                                t_portal, z_portal, y_portal, lat_portal, lon_portal = \
                                    cf.reject_timestamps_dataportal(subsite, r, dtime, zpressure, ndata, lat, lon)

                                print('removed {} data points using visual inspection of data'.format(
                                    len(ndata) - len(z_portal)))

                                # create data groups
                                if len(y_portal) > 0:
                                    columns = ['tsec', 'dbar', str(sv)]
                                    min_r = int(round(np.nanmin(y_portal) - zcell_size))
                                    max_r = int(round(np.nanmax(y_portal) + zcell_size))
                                    ranges = list(range(min_r, max_r, zcell_size))

                                    groups, d_groups = gt.group_by_depth_range(t_portal, y_portal, z_portal, columns, ranges)

                                    if 'scatter' in sv:
                                        n_std = None  # to use percentile
                                    else:
                                        n_std = n_std

                                    #  get percentile analysis for printing on the profile plot
                                    y_avg, n_avg, n_min, n_max, n0_std, n1_std, l_arr, time_ex = cf.reject_timestamps_in_groups(
                                        groups, d_groups, n_std, inpercentile)

                            """
                            Plot all data
                            """
                            if len(time1) > 0:
                                cf.create_dir(save_dir_profile)
                                cf.create_dir(save_dir_xsection)
                                sname = '-'.join((r, method, sv))
                                sfileall = '_'.join(('all_data', sname, pd.to_datetime(time1.min()).strftime('%Y%m%d')))
                                tm0 = pd.to_datetime(time1.min()).strftime('%Y-%m-%dT%H:%M:%S')
                                tm1 = pd.to_datetime(time1.max()).strftime('%Y-%m-%dT%H:%M:%S')
                                title = ' '.join((deployment, refdes, method)) + '\n' + tm0 + ' to ' + tm1
                                if 'SPKIR' in r:
                                    title = title + '\nWavelength = 510 nm'

                                '''
                                profile plot
                                '''
                                xlabel = sv + " (" + sv_units + ")"
                                ylabel = press[0] + " (" + y_units[0] + ")"
                                clabel = 'Time'

                                fig, ax = pf.plot_profiles(z1, y1, time1, ylabel, xlabel, clabel, stdev=None)

                                ax.set_title(title, fontsize=9)
                                fig.tight_layout()
                                pf.save_fig(save_dir_profile, sfileall)

                                '''
                                xsection plot
                                '''
                                clabel = sv + " (" + sv_units + ")"
                                ylabel = press[0] + " (" + y_units[0] + ")"

                                fig, ax, bar = pf.plot_xsection(subsite, time1, y1, z1, clabel, ylabel, t_eng=None,
                                                                m_water_depth=None, inpercentile=None, stdev=None)

                                if fig:
                                    ax.set_title(title, fontsize=9)
                                    fig.tight_layout()
                                    pf.save_fig(save_dir_xsection, sfileall)

                            """
                            Plot cleaned-up data
                            """
                            if len(dtime) > 0:
                                if len(y_portal) > 0:
                                    sfile = '_'.join(('rm_erroneous_data', sname, pd.to_datetime(t_portal.min()).strftime('%Y%m%d')))
                                    t0 = pd.to_datetime(t_portal.min()).strftime('%Y-%m-%dT%H:%M:%S')
                                    t1 = pd.to_datetime(t_portal.max()).strftime('%Y-%m-%dT%H:%M:%S')
                                    title = ' '.join((deployment, refdes, method)) + '\n' + t0 + ' to ' + t1
                                    if 'SPKIR' in r:
                                        title = title + '\nWavelength = 510 nm'

                                    '''
                                    profile plot
                                    '''
                                    xlabel = sv + " (" + sv_units + ")"
                                    ylabel = press[0] + " (" + y_units[0] + ")"
                                    clabel = 'Time'

                                    fig, ax = pf.plot_profiles(z_portal, y_portal, t_portal, ylabel, xlabel, clabel, stdev=None)

                                    ax.set_title(title, fontsize=9)
                                    ax.plot(n_avg, y_avg, '-k')
                                    ax.fill_betweenx(y_avg, n0_std, n1_std, color='m', alpha=0.2)
                                    if inpercentile:
                                        leg_text = (
                                            'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}], '
                                            '{} unreasonable values'.format(lenfv, lennan, lenev, lengr, global_min, global_max, lenzero) +
                                            '\nexcluded {} suspect data points when inspected visually'.format(
                                                len(ndata) - len(z_portal)) +
                                            '\n(black) data average in {} dbar segments'.format(zcell_size) +
                                            '\n(magenta) {} percentile envelope in {} dbar segments'.format(
                                                int(100 - inpercentile * 2), zcell_size),)
                                    elif n_std:
                                        leg_text = (
                                            'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}], '
                                            '{} unreasonable values'.format(lenfv, lennan, lenev, lengr, global_min, global_max,
                                                              lenzero) +
                                            '\nexcluded {} suspect data points when inspected visually'.format(
                                                len(ndata) - len(z_portal)) +
                                            '\n(black) data average in {} dbar segments'.format(zcell_size) +
                                            '\n(magenta) +/- {} SD envelope in {} dbar segments'.format(
                                                int(n_std), zcell_size),)
                                    ax.legend(leg_text, loc='upper center', bbox_to_anchor=(0.5, -0.17), fontsize=6)
                                    fig.tight_layout()
                                    pf.save_fig(save_dir_profile, sfile)

                                    '''
                                    xsection plot
                                    '''
                                    clabel = sv + " (" + sv_units + ")"
                                    ylabel = press[0] + " (" + y_units[0] + ")"

                                    # plot non-erroneous data
                                    fig, ax, bar = pf.plot_xsection(subsite, t_portal, y_portal, z_portal, clabel, ylabel,
                                                                    t_eng=None, m_water_depth=None, inpercentile=None,
                                                                    stdev=None)

                                    ax.set_title(title, fontsize=9)
                                    leg_text = (
                                        'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}], '
                                        '{} unreasonable values'.format(lenfv, lennan, lenev, lengr, global_min, global_max, lenzero) +
                                        '\nexcluded {} suspect data points when inspected visually'.format(
                                            len(ndata) - len(z_portal)),
                                    )
                                    ax.legend(leg_text, loc='upper center', bbox_to_anchor=(0.5, -0.17), fontsize=6)
                                    fig.tight_layout()
                                    pf.save_fig(save_dir_xsection, sfile)

                                    '''
                                    4D plot for gliders only
                                    '''
                                    if 'MOAS' in r:
                                        if ds_lat is not None and ds_lon is not None:
                                            cf.create_dir(save_dir_4d)

                                            clabel = sv + " (" + sv_units + ")"
                                            zlabel = press[0] + " (" + y_units[0] + ")"

                                            fig = plt.figure()
                                            ax = fig.add_subplot(111, projection='3d')
                                            sct = ax.scatter(lon_portal, lat_portal, y_portal, c=z_portal, s=2)
                                            cbar = plt.colorbar(sct, label=clabel, extend='both')
                                            cbar.ax.tick_params(labelsize=8)
                                            ax.invert_zaxis()
                                            ax.view_init(25, 32)
                                            ax.invert_xaxis()
                                            ax.invert_yaxis()
                                            ax.set_zlabel(zlabel, fontsize=9)
                                            ax.set_ylabel('Latitude', fontsize=9)
                                            ax.set_xlabel('Longitude', fontsize=9)

                                            ax.set_title(title, fontsize=9)
                                            pf.save_fig(save_dir_4d, sfile)
def main(url_list, sDir, plot_type, deployment_num, start_time, end_time, preferred_only, glider, zdbar, n_std, inpercentile, zcell_size):
    rd_list = []
    for uu in url_list:
        elements = uu.split('/')[-2].split('-')
        rd = '-'.join((elements[1], elements[2], elements[3], elements[4]))
        if rd not in rd_list and 'ENG' not in rd:
            rd_list.append(rd)

    for r in rd_list:
        print('\n{}'.format(r))
        datasets = []
        for u in url_list:
            splitter = u.split('/')[-2].split('-')
            rd_check = '-'.join((splitter[1], splitter[2], splitter[3], splitter[4]))
            if rd_check == r:
                udatasets = cf.get_nc_urls([u])
                datasets.append(udatasets)
        datasets = list(itertools.chain(*datasets))
        fdatasets = []
        if preferred_only == 'yes':
            # get the preferred stream information
            ps_df, n_streams = cf.get_preferred_stream_info(r)
            for index, row in ps_df.iterrows():
                for ii in range(n_streams):
                    try:
                        rms = '-'.join((r, row[ii]))
                    except TypeError:
                        continue
                    for dd in datasets:
                        spl = dd.split('/')[-2].split('-')
                        catalog_rms = '-'.join((spl[1], spl[2], spl[3], spl[4], spl[5], spl[6]))
                        fdeploy = dd.split('/')[-1].split('_')[0]
                        if rms == catalog_rms and fdeploy == row['deployment']:
                            fdatasets.append(dd)
        else:
            fdatasets = datasets

        main_sensor = r.split('-')[-1]
        fdatasets_sel = cf.filter_collocated_instruments(main_sensor, fdatasets)

        for fd in fdatasets_sel:
            part_d = fd.split('/')[-1]
            print(part_d)
            ds = xr.open_dataset(fd, mask_and_scale=False)
            ds = ds.swap_dims({'obs': 'time'})

            fname, subsite, refdes, method, stream, deployment = cf.nc_attributes(fd)
            array = subsite[0:2]
            sci_vars = cf.return_science_vars(stream)

            if 'CE05MOAS' in r or 'CP05MOAS' in r:  # for coastal gliders, get m_water_depth for bathymetry
                eng = '-'.join((r.split('-')[0], r.split('-')[1], '00-ENG000000', method, 'glider_eng'))
                eng_url = [s for s in url_list if eng in s]
                if len(eng_url) == 1:
                    eng_datasets = cf.get_nc_urls(eng_url)
                    # filter out collocated datasets
                    eng_dataset = [j for j in eng_datasets if (eng in j.split('/')[-1] and deployment in j.split('/')[-1])]
                    if len(eng_dataset) > 0:
                        ds_eng = xr.open_dataset(eng_dataset[0], mask_and_scale=False)
                        t_eng = ds_eng['time'].values
                        m_water_depth = ds_eng['m_water_depth'].values

                        # m_altimeter_status = 0 means a good reading (not nan or -1)
                        eng_ind = ds_eng['m_altimeter_status'].values == 0
                        m_water_depth = m_water_depth[eng_ind]
                        t_eng = t_eng[eng_ind]
                    else:
                        print('No engineering file for deployment {}'.format(deployment))

            if deployment_num is not None:
                if int(deployment.split('0')[-1]) is not deployment_num:
                    print(type(int(deployment.split('0')[-1])), type(deployment_num))
                    continue

            if start_time is not None and end_time is not None:
                ds = ds.sel(time=slice(start_time, end_time))
                if len(ds['time'].values) == 0:
                    print('No data to plot for specified time range: ({} to {})'.format(start_time, end_time))
                    continue
                stime = start_time.strftime('%Y-%m-%d')
                etime = end_time.strftime('%Y-%m-%d')
                ext = stime + 'to' + etime  # .join((ds0_method, ds1_method
                save_dir = os.path.join(sDir, array, subsite, refdes, plot_type, deployment, ext)
            else:
                save_dir = os.path.join(sDir, array, subsite, refdes, plot_type, deployment)

            cf.create_dir(save_dir)

            tm = ds['time'].values

            # get pressure variable
            ds_vars = list(ds.data_vars.keys()) + [x for x in ds.coords.keys() if 'pressure' in x]

            y, y_units, press = cf.add_pressure_to_dictionary_of_sci_vars(ds)
            print(y_units, press)

            # press = pf.pressure_var(ds, ds_vars)
            # print(press)
            # y = ds[press].values
            # y_units = ds[press].units

            for sv in sci_vars:
                print(sv)
                if 'sci_water_pressure' not in sv:
                    z = ds[sv].values
                    fv = ds[sv]._FillValue
                    z_units = ds[sv].units

                    # Check if the array is all NaNs
                    if sum(np.isnan(z)) == len(z):
                        print('Array of all NaNs - skipping plot.')
                        continue

                    # Check if the array is all fill values
                    elif len(z[z != fv]) == 0:
                        print('Array of all fill values - skipping plot.')
                        continue

                    else:

                        """
                        clean up data
                        """
                        # reject erroneous data
                        dtime, zpressure, ndata, lenfv, lennan, lenev, lengr, global_min, global_max = \
                                                                        cf.reject_erroneous_data(r, sv, tm, y, z, fv)

                        # get rid of 0.0 data
                        if 'CTD' in r:
                            ind = zpressure > 0.0
                        else:
                            ind = ndata > 0.0

                        lenzero = np.sum(~ind)
                        dtime = dtime[ind]
                        zpressure = zpressure[ind]
                        ndata = ndata[ind]

                        # creating data groups
                        columns = ['tsec', 'dbar', str(sv)]
                        min_r = int(round(min(zpressure) - zcell_size))
                        max_r = int(round(max(zpressure) + zcell_size))
                        ranges = list(range(min_r, max_r, zcell_size))

                        groups, d_groups = gt.group_by_depth_range(dtime, zpressure, ndata, columns, ranges)

                        #  rejecting timestamps from percentile analysis
                        y_avg, n_avg, n_min, n_max, n0_std, n1_std, l_arr, time_ex = cf.reject_timestamps_in_groups(
                            groups, d_groups, n_std, inpercentile)

                        t_nospct, z_nospct, y_nospct = cf.reject_suspect_data(dtime, zpressure, ndata, time_ex)

                        print('removed {} data points using {} percentile of data grouped in {} dbar segments'.format(
                                                    len(zpressure) - len(z_nospct), inpercentile, zcell_size))

                        # reject time range from data portal file export
                        t_portal, z_portal, y_portal = cf.reject_timestamps_dataportal(subsite, r,
                                                                                    t_nospct, y_nospct, z_nospct)
                        print('removed {} data points using visual inspection of data'.format(len(z_nospct) - len(z_portal)))

                        # reject data in a depth range
                        if zdbar:
                            y_ind = y_portal < zdbar
                            n_zdbar = np.sum(~y_ind)
                            t_array = t_portal[y_ind]
                            y_array = y_portal[y_ind]
                            z_array = z_portal[y_ind]
                        else:
                            n_zdbar = 0
                            t_array = t_portal
                            y_array = y_portal
                            z_array = z_portal
                        print('{} in water depth > {} dbar'.format(n_zdbar, zdbar))

                    """
                    Plot data
                    """

                    if len(dtime) > 0:
                        sname = '-'.join((r, method, sv))

                        clabel = sv + " (" + z_units + ")"
                        ylabel = press[0] + " (" + y_units[0] + ")"

                        if glider == 'no':
                            t_eng = None
                            m_water_depth = None

                        # plot non-erroneous data
                        fig, ax, bar = pf.plot_xsection(subsite, dtime, zpressure, ndata, clabel, ylabel,
                                                        t_eng, m_water_depth, inpercentile, stdev=None)

                        t0 = pd.to_datetime(dtime.min()).strftime('%Y-%m-%dT%H:%M:%S')
                        t1 = pd.to_datetime(dtime.max()).strftime('%Y-%m-%dT%H:%M:%S')
                        title = ' '.join((deployment, refdes, method)) + '\n' + t0 + ' to ' + t1

                        ax.set_title(title, fontsize=9)
                        leg_text = (
                            'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}], '
                            '{} zeros'.format(lenfv, lennan, lenev, lengr, global_min, global_max, lenzero),
                        )
                        ax.legend(leg_text, loc='upper center', bbox_to_anchor=(0.5, -0.17), fontsize=6)
                        fig.tight_layout()
                        sfile = '_'.join(('rm_erroneous_data', sname))
                        pf.save_fig(save_dir, sfile)

                        # plots removing all suspect data
                        if len(t_array) > 0:
                            if len(t_array) != len(dtime):
                                # plot bathymetry only within data time ranges
                                if glider == 'yes':
                                    eng_ind = (t_eng >= np.min(t_array)) & (t_eng <= np.max(t_array))
                                    t_eng = t_eng[eng_ind]
                                    m_water_depth = m_water_depth[eng_ind]

                                fig, ax, bar = pf.plot_xsection(subsite, t_array, y_array, z_array, clabel, ylabel,
                                                                t_eng, m_water_depth, inpercentile, stdev=None)

                                t0 = pd.to_datetime(t_array.min()).strftime('%Y-%m-%dT%H:%M:%S')
                                t1 = pd.to_datetime(t_array.max()).strftime('%Y-%m-%dT%H:%M:%S')
                                title = ' '.join((deployment, refdes, method)) + '\n' + t0 + ' to ' + t1

                                ax.set_title(title, fontsize=9)
                                if zdbar:
                                    leg_text = (
                                        'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}], '
                                        '{} zeros'.format(lenfv, lennan, lenev, lengr, global_min, global_max, lenzero)
                                        + '\nremoved {} in the upper and lower {}th percentile of data grouped in {} dbar segments'.format(
                                            len(zpressure) - len(z_nospct), inpercentile, zcell_size)
                                        + '\nexcluded {} suspect data points when inspected visually'.format(
                                            len(z_nospct) - len(z_portal))
                                        + '\nexcluded {} suspect data in water depth greater than {} dbar'.format(n_zdbar,
                                                                                                             zdbar),
                                    )
                                else:
                                    leg_text = (
                                        'removed {} fill values, {} NaNs, {} Extreme Values (1e7), {} Global ranges [{} - {}], '
                                        '{} zeros'.format(lenfv, lennan, lenev, lengr, global_min, global_max, lenzero)
                                        + '\nremoved {} in the upper and lower {}th percentile of data grouped in {} dbar segments'.format(
                                            len(zpressure) - len(z_nospct), inpercentile, zcell_size)
                                        + '\nexcluded {} suspect data points when inspected visually'.format(
                                            len(z_nospct) - len(z_portal)),
                                    )
                                ax.legend(leg_text, loc='upper center', bbox_to_anchor=(0.5, -0.17), fontsize=6)
                                fig.tight_layout()

                                sfile = '_'.join(('rm_suspect_data', sname))
                                pf.save_fig(save_dir, sfile)
Ejemplo n.º 35
0
def main(files, out, time_break, depth):
    """
    files: url to an .nc/.ncml file or the path to a text file containing .nc/.ncml links. A # at the front will skip links in the text file.
    out: Directory to save plots
    """
    fname, ext = os.path.splitext(files)
    if ext in '.nc':
        list_files = [files]
    elif ext in '.ncml':
        list_files = [files]
    else:
        list_files = read_file(files)

    stream_vars = pf.load_variable_dict(var='eng')  # load engineering variables
    for nc in list_files:
        print nc
        with xr.open_dataset(nc, mask_and_scale=False) as ds:
            # change dimensions from 'obs' to 'time'
            ds = ds.swap_dims({'obs': 'time'})
            ds_variables = ds.data_vars.keys()  # List of dataset variables
            stream = ds.stream  # List stream name associated with the data
            title_pre = mk_str(ds.attrs, 't')  # , var, tt0, tt1, 't')
            save_pre = mk_str(ds.attrs, 's')  # , var, tt0, tt1, 's')
            platform = ds.subsite
            node = ds.node
            sensor = ds.sensor
            deployment = 'D0000{}'.format(str(np.unique(ds.deployment)[0]))
            stream = ds.stream
            save_dir = os.path.join(out, platform, deployment, node, sensor, stream, 'depth_profiles')
            cf.create_dir(save_dir)

            # try:
            #     eng = stream_vars[stream]  # select specific streams engineering variables
            # except KeyError:
            #     eng = ['']

            misc = ['quality', 'string', 'timestamp', 'deployment', 'id', 'provenance', 'qc',  'time', 'mission', 'obs',
            'volt', 'ref', 'sig', 'amp', 'rph', 'calphase', 'phase', 'therm']

            # reg_ex = re.compile('|'.join(eng+misc))  # make regular expression
            reg_ex = re.compile('|'.join(misc))

            #  keep variables that are not in the regular expression
            sci_vars = [s for s in ds_variables if not reg_ex.search(s)]

            # t0, t1 = pf.get_rounded_start_and_end_times(ds_disk['time'].data)
            # tI = (pd.to_datetime(t0) + (pd.to_datetime(t1) - pd.to_datetime(t0)) / 2)
            # time_list = [[t0, t1], [t0, tI], [tI, t1]]


            times = np.unique(ds[time_break])
            for t in times:
                time_ind = t == ds[time_break].data

                for var in sci_vars:
                    x = dict(data=ds['time'].data[time_ind],
                             info=dict(label='Time', units='GMT'))
                    t0 = pd.to_datetime(x['data'].min()).strftime('%Y-%m-%dT%H%M%00')
                    t1 = pd.to_datetime(x['data'].max()).strftime('%Y-%m-%dT%H%M%00')
                    try:
                        sci = ds[var]
                        print var
                        # sci = sub_ds[var]
                    except UnicodeEncodeError: # some comments have latex characters
                        ds[var].attrs.pop('comment')  # remove from the attributes
                        sci = ds[var]  # or else the variable won't load


                    y = dict(data=ds[depth].data[time_ind], info=dict(label='Pressure', units='dbar', var=var,
                                                                platform=platform, node=node, sensor=sensor))

                    try:
                        z_lab = sci.long_name
                    except AttributeError:
                        z_lab = sci.standard_name
                    z = dict(data=sci.data[time_ind], info=dict(label=z_lab, units=sci.units, var=var,
                                                                platform=platform, node=node, sensor=sensor))

                    title = title_pre + var

                    # plot timeseries with outliers
                    fig, ax = pf.depth_cross_section(z, y, x, title=title)
                    pf.resize(width=12, height=8.5)  # Resize figure

                    save_name = '{}-{}-{}_{}_{}-{}'.format(platform, node, sensor, var, t0, t1)
                    pf.save_fig(save_dir, save_name, res=150)  # Save figure
                    plt.close('all')
                    # try:
                    #     y_lab = sci.standard_name
                    # except AttributeError:
                    #     y_lab = var
                    # y = dict(data=sci.data, info=dict(label=y_lab, units=sci.units))

                del x, y
def main(url_list, sDir, plot_type):
    """""
    URL : path to instrument data by methods
    sDir : path to the directory on your machine to save files
    plot_type: folder name for a plot type

    """ ""
    rd_list = []
    ms_list = []
    for uu in url_list:
        elements = uu.split('/')[-2].split('-')
        rd = '-'.join((elements[1], elements[2], elements[3], elements[4]))
        ms = uu.split(rd + '-')[1].split('/')[0]
        if rd not in rd_list:
            rd_list.append(rd)
        if ms not in ms_list:
            ms_list.append(ms)
    ''' 
    separate different instruments
    '''
    for r in rd_list:
        print('\n{}'.format(r))
        subsite = r.split('-')[0]
        array = subsite[0:2]
        main_sensor = r.split('-')[-1]

        ps_df, n_streams = cf.get_preferred_stream_info(r)

        # read in the analysis file
        dr_data = cf.refdes_datareview_json(r)

        # get end times of deployments
        deployments = []
        end_times = []
        for index, row in ps_df.iterrows():
            deploy = row['deployment']
            deploy_info = get_deployment_information(dr_data, int(deploy[-4:]))
            deployments.append(int(deploy[-4:]))
            end_times.append(pd.to_datetime(deploy_info['stop_date']))

        # get the list of data files and filter out collocated instruments and other streams chat
        datasets = []
        for u in url_list:
            print(u)
            splitter = u.split('/')[-2].split('-')
            rd_check = '-'.join(
                (splitter[1], splitter[2], splitter[3], splitter[4]))
            if rd_check == r:
                udatasets = cf.get_nc_urls([u])
                datasets.append(udatasets)

        datasets = list(itertools.chain(*datasets))
        fdatasets = cf.filter_collocated_instruments(main_sensor, datasets)
        fdatasets = cf.filter_other_streams(r, ms_list, fdatasets)
        '''
        separate the data files by methods
        '''
        for ms in ms_list:  # np.unique(methodstream)
            fdatasets_sel = [x for x in fdatasets if ms in x]

            # create a folder to save figures
            save_dir = os.path.join(sDir, array, subsite, r, plot_type,
                                    ms.split('-')[0])
            cf.create_dir(save_dir)

            # create a dictionary for science variables from analysis file
            stream_sci_vars_dict = dict()
            for x in dr_data['instrument']['data_streams']:
                dr_ms = '-'.join((x['method'], x['stream_name']))
                if ms == dr_ms:
                    stream_sci_vars_dict[dr_ms] = dict(vars=dict())
                    sci_vars = dict()
                    for y in x['stream']['parameters']:
                        if y['data_product_type'] == 'Science Data':
                            sci_vars.update(
                                {y['name']: dict(db_units=y['unit'])})
                    if len(sci_vars) > 0:
                        stream_sci_vars_dict[dr_ms]['vars'] = sci_vars

            # initialize an empty data array for science variables in dictionary
            sci_vars_dict = cd.initialize_empty_arrays(stream_sci_vars_dict,
                                                       ms)

            y_unit = []
            y_name = []
            for fd in fdatasets_sel:
                ds = xr.open_dataset(fd, mask_and_scale=False)
                print('\nAppending data file: {}'.format(fd.split('/')[-1]))
                for var in list(sci_vars_dict[ms]['vars'].keys()):
                    sh = sci_vars_dict[ms]['vars'][var]
                    if ds[var].units == sh['db_units']:
                        if ds[var]._FillValue not in sh['fv']:
                            sh['fv'].append(ds[var]._FillValue)
                        if ds[var].units not in sh['units']:
                            sh['units'].append(ds[var].units)

                        # time
                        t = ds['time'].values
                        t0 = pd.to_datetime(
                            t.min()).strftime('%Y-%m-%dT%H:%M:%S')
                        t1 = pd.to_datetime(
                            t.max()).strftime('%Y-%m-%dT%H:%M:%S')

                        # sci variable
                        z = ds[var].values
                        sh['t'] = np.append(sh['t'], t)
                        sh['values'] = np.append(sh['values'], z)

                        # add pressure to dictionary of sci vars
                        if 'MOAS' in subsite:
                            if 'CTD' in main_sensor:  # for glider CTDs, pressure is a coordinate
                                pressure = 'sci_water_pressure_dbar'
                                y = ds[pressure].values
                                if ds[pressure].units not in y_unit:
                                    y_unit.append(ds[pressure].units)
                                if ds[pressure].long_name not in y_name:
                                    y_name.append(ds[pressure].long_name)
                            else:
                                pressure = 'int_ctd_pressure'
                                y = ds[pressure].values
                                if ds[pressure].units not in y_unit:
                                    y_unit.append(ds[pressure].units)
                                if ds[pressure].long_name not in y_name:
                                    y_name.append(ds[pressure].long_name)
                        else:
                            pressure = pf.pressure_var(ds, ds.data_vars.keys())
                            y = ds[pressure].values
                            if ds[pressure].units not in y_unit:
                                y_unit.append(ds[pressure].units)
                            if ds[pressure].long_name not in y_name:
                                y_name.append(ds[pressure].long_name)

                        sh['pressure'] = np.append(sh['pressure'], y)

            if len(y_unit) != 1:
                print('pressure unit varies!')
            else:
                y_unit = y_unit[0]

            if len(y_name) != 1:
                print('pressure long name varies!')
            else:
                y_name = y_name[0]

            for m, n in sci_vars_dict.items():
                for sv, vinfo in n['vars'].items():
                    print('\nWorking on variable: {}'.format(sv))
                    if len(vinfo['t']) < 1:
                        print('no variable data to plot')
                    else:
                        sv_units = vinfo['units'][0]
                        fv = vinfo['fv'][0]
                        t0 = pd.to_datetime(min(
                            vinfo['t'])).strftime('%Y-%m-%dT%H:%M:%S')
                        t1 = pd.to_datetime(max(
                            vinfo['t'])).strftime('%Y-%m-%dT%H:%M:%S')
                        t = vinfo['t']
                        x = vinfo['values']
                        y = vinfo['pressure']

                    # Check if the array is all NaNs
                    if sum(np.isnan(x)) == len(x):
                        print('Array of all NaNs - skipping plot.')
                        continue

                    # Check if the array is all fill values
                    elif len(x[x != fv]) == 0:
                        print('Array of all fill values - skipping plot.')
                        continue

                    else:
                        # reject fill values
                        fv_ind = x != fv
                        y_nofv = y[fv_ind]
                        t_nofv = t[fv_ind]
                        c_nofv = cm.rainbow(np.linspace(0, 1, len(t[fv_ind])))
                        x_nofv = x[fv_ind]
                        print(len(x) - len(fv_ind), ' fill values')

                        # reject NaNs
                        nan_ind = ~np.isnan(x)
                        t_nofv_nonan = t_nofv[nan_ind]
                        c_nofv_nonan = c_nofv[nan_ind]
                        y_nofv_nonan = y_nofv[nan_ind]
                        x_nofv_nonan = x_nofv[nan_ind]
                        print(len(x) - len(nan_ind), ' NaNs')

                        # reject extreme values
                        ev_ind = cf.reject_extreme_values(x_nofv_nonan)
                        t_nofv_nonan_noev = t_nofv_nonan[ev_ind]
                        c_nofv_nonan_noev = c_nofv_nonan[ev_ind]
                        y_nofv_nonan_noev = y_nofv_nonan[ev_ind]
                        x_nofv_nonan_noev = x_nofv_nonan[ev_ind]
                        print(len(z) - len(ev_ind), ' Extreme Values', '|1e7|')

                        # reject values outside global ranges:
                        global_min, global_max = cf.get_global_ranges(r, sv)
                        # platform not in qc-table (parad_k_par)
                        # global_min = 0
                        # global_max = 2500
                        print('global ranges for : {}-{}  {} - {}'.format(
                            r, sv, global_min, global_max))
                        if isinstance(global_min, (int, float)) and isinstance(
                                global_max, (int, float)):
                            gr_ind = cf.reject_global_ranges(
                                x_nofv_nonan_noev, global_min, global_max)
                            t_nofv_nonan_noev_nogr = t_nofv_nonan_noev[gr_ind]
                            y_nofv_nonan_noev_nogr = y_nofv_nonan_noev[gr_ind]
                            x_nofv_nonan_noev_nogr = x_nofv_nonan_noev[gr_ind]
                        else:
                            t_nofv_nonan_noev_nogr = t_nofv_nonan_noev
                            y_nofv_nonan_noev_nogr = y_nofv_nonan_noev
                            x_nofv_nonan_noev_nogr = x_nofv_nonan_noev

                    if len(x_nofv_nonan_noev) > 0:
                        if m == 'common_stream_placeholder':
                            sname = '-'.join((r, sv))
                        else:
                            sname = '-'.join((r, m, sv))

                    if sv != 'pressure':
                        columns = ['tsec', 'dbar', str(sv)]
                        bin_size = 10
                        min_r = int(round(min(y_nofv_nonan_noev) - bin_size))
                        max_r = int(round(max(y_nofv_nonan_noev) + bin_size))
                        ranges = list(range(min_r, max_r, bin_size))
                        groups, d_groups = gt.group_by_depth_range(
                            t_nofv_nonan_noev_nogr, y_nofv_nonan_noev_nogr,
                            x_nofv_nonan_noev_nogr, columns, ranges)

                    y_avg, n_avg, n_min, n_max, n0_std, n1_std, l_arr = [], [], [], [], [], [], []
                    tm = 1
                    for ii in range(len(groups)):
                        nan_ind = d_groups[ii + tm].notnull()
                        xtime = d_groups[ii + tm][nan_ind]
                        colors = cm.rainbow(np.linspace(0, 1, len(xtime)))
                        ypres = d_groups[ii + tm + 1][nan_ind]
                        nval = d_groups[ii + tm + 2][nan_ind]
                        tm += 2

                        l_arr.append(len(
                            nval))  # count of data to filter out small groups
                        y_avg.append(ypres.mean())
                        n_avg.append(nval.mean())
                        n_min.append(nval.min())
                        n_max.append(nval.max())
                        n_std = 3
                        n0_std.append(nval.mean() + n_std * nval.std())
                        n1_std.append(nval.mean() - n_std * nval.std())

                    # Plot all data
                    ylabel = y_name + " (" + y_unit + ")"
                    xlabel = sv + " (" + sv_units + ")"
                    clabel = 'Time'

                    fig, ax = pf.plot_profiles(x_nofv_nonan_noev_nogr,
                                               y_nofv_nonan_noev_nogr,
                                               t_nofv_nonan_noev_nogr,
                                               ylabel,
                                               xlabel,
                                               clabel,
                                               end_times,
                                               deployments,
                                               stdev=None)

                    title_text = ' '.join((r, ms.split('-')[-1])) + '\n' \
                                 + t0 + ' - ' + t1 + '\n' + str(bin_size) +\
                                 ' m average and ' + str(n_std) + ' std shown'

                    ax.set_title(title_text, fontsize=9)
                    ax.plot(n_avg, y_avg, '-k')

                    ax.fill_betweenx(y_avg,
                                     n0_std,
                                     n1_std,
                                     color='m',
                                     alpha=0.2)
                    pf.save_fig(save_dir, sname)

                    # Plot data with outliers removed

                    fig, ax = pf.plot_profiles(x_nofv_nonan_noev_nogr,
                                               y_nofv_nonan_noev_nogr,
                                               t_nofv_nonan_noev_nogr,
                                               ylabel,
                                               xlabel,
                                               clabel,
                                               end_times,
                                               deployments,
                                               stdev=5)
                    ax.set_title(' '.join((r, ms.split('-')[-1])) + '\n' \
                                 + t0 + ' - ' + t1, fontsize=9)
                    sfile = '_'.join((sname, 'rmoutliers'))
                    pf.save_fig(save_dir, sfile)