Python return_raw_vars Examples

Programming Language: Python

Namespace/Package Name: functions.common

Method/Function: return_raw_vars

Examples at hotexamples.com: 6

Python return_raw_vars - 6 examples found. These are the top rated real world Python examples of functions.common.return_raw_vars extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def append_variable_data(ds, variable_dict, common_stream_name, exclude_times):
    pressure_unit, pressure_name = [], []
    ds_vars = cf.return_raw_vars(list(ds.data_vars.keys()) + list(ds.coords))
    vars_dict = variable_dict[common_stream_name]['vars']

    print('\nPARAMETERS: ')
    for var in ds_vars:
        try:
            long_name = ds[var].long_name
            x = [x for x in list(vars_dict.keys()) if long_name in x]
            if len(x) != 0:
                long_name = x[0]
                if ds[var].units == vars_dict[long_name]['db_units']:
                    print('______', long_name)
                    if ds[var]._FillValue not in vars_dict[long_name]['fv']:
                        vars_dict[long_name]['fv'].append(ds[var]._FillValue)
                    if ds[var].units not in vars_dict[long_name]['units']:
                        vars_dict[long_name]['units'].append(ds[var].units)
                    tD = ds['time'].values
                    varD = ds[var].values
                    deployD = ds['deployment'].values

                    # find the pressure to use from the data file
                    pvar, pD, p_unit, p_name, p_fv = cf.add_pressure_to_dictionary_of_sci_vars(
                        ds)
                    if p_unit not in pressure_unit:
                        pressure_unit.append(p_unit)
                    if p_name not in pressure_name:
                        pressure_name.append(p_name)

                    if len(ds[var].dims) == 1:
                        if len(exclude_times) > 0:
                            for et in exclude_times:
                                tD, pD, varD, deployD = exclude_time_ranges(
                                    tD, pD, varD, deployD, et)
                            if len(tD) > 0:
                                vars_dict[long_name]['t'] = np.append(
                                    vars_dict[long_name]['t'], tD)
                                vars_dict[long_name]['pressure'] = np.append(
                                    vars_dict[long_name]['pressure'], pD)
                                vars_dict[long_name]['values'] = np.append(
                                    vars_dict[long_name]['values'], varD)
                                vars_dict[long_name][
                                    'deployments'] = np.append(
                                        vars_dict[long_name]['deployments'],
                                        deployD)
                        else:
                            vars_dict[long_name]['t'] = np.append(
                                vars_dict[long_name]['t'], tD)
                            vars_dict[long_name]['pressure'] = np.append(
                                vars_dict[long_name]['pressure'], pD)
                            vars_dict[long_name]['values'] = np.append(
                                vars_dict[long_name]['values'], varD)
                            vars_dict[long_name]['deployments'] = np.append(
                                vars_dict[long_name]['deployments'], deployD)
                    else:
                        # appending 2D datasets
                        if type(vars_dict[long_name]['values']) != dict:
                            vars_dict[long_name].pop('values')
                            vars_dict[long_name].update({'values': dict()})
                        varD = varD.T

                        # for presf_wave_burst data, telemetered and recovered_host pressure data have a matrix of 20,
                        # while recovered_inst data have a matrix of 1024. for DCL data, whatever is above 20 will
                        # be an array of nans as placeholders (so the indices match between DCL and recovered_inst
                        if common_stream_name == 'presf_abc_wave_burst':
                            lendims = 1024
                        else:
                            lendims = len(varD)
                        for i in range(lendims):
                            tD = ds['time'].values  # reset the time variable
                            deployD = ds['deployment'].values
                            pDi = pD
                            try:
                                vars_dict[long_name]['values'][i]
                            except KeyError:
                                vars_dict[long_name]['values'].update(
                                    {i: np.array([])})
                            try:
                                varDi = varD[i]
                            except IndexError:
                                varDi = np.empty(np.shape(tD))
                                varDi[:] = np.nan
                            if len(exclude_times) > 0:
                                for et in exclude_times:
                                    tD, pDi, varDi, deployD = exclude_time_ranges(
                                        tD, pDi, varDi, deployD, et)
                                if len(tD) > 0:
                                    if i == 0:
                                        vars_dict[long_name]['t'] = np.append(
                                            vars_dict[long_name]['t'], tD)
                                        vars_dict[long_name][
                                            'pressure'] = np.append(
                                                vars_dict[long_name]
                                                ['pressure'], pDi)
                                        vars_dict[long_name]['values'][
                                            i] = np.append(
                                                vars_dict[long_name]['values']
                                                [i], varDi)
                                        vars_dict[long_name][
                                            'deployments'] = np.append(
                                                vars_dict[long_name]
                                                ['deployments'], deployD)
                                    else:
                                        vars_dict[long_name]['values'][
                                            i] = np.append(
                                                vars_dict[long_name]['values']
                                                [i], varDi)
                            else:
                                if i == 0:
                                    vars_dict[long_name]['t'] = np.append(
                                        vars_dict[long_name]['t'], tD)
                                    vars_dict[long_name][
                                        'pressure'] = np.append(
                                            vars_dict[long_name]['pressure'],
                                            pDi)
                                    vars_dict[long_name]['values'][
                                        i] = np.append(
                                            vars_dict[long_name]['values'][i],
                                            varDi)
                                    vars_dict[long_name][
                                        'deployments'] = np.append(
                                            vars_dict[long_name]
                                            ['deployments'], deployD)
                                else:
                                    vars_dict[long_name]['values'][
                                        i] = np.append(
                                            vars_dict[long_name]['values'][i],
                                            varDi)
        except AttributeError:
            continue

    return variable_dict, pressure_unit, pressure_name

Example #2

Show file

def append_evaluated_data(sDir, deployment, ds, variable_dict,
                          common_stream_name, zdbar):
    pressure_unit, pressure_name = [], []
    r = '{}-{}-{}'.format(ds.subsite, ds.node, ds.sensor)
    ds_vars = cf.return_raw_vars(list(ds.data_vars.keys()) + list(ds.coords))
    vars_dict = variable_dict[common_stream_name]['vars']
    total_len = 0
    for var in ds_vars:
        try:
            long_name = ds[var].long_name
            x = [x for x in list(vars_dict.keys()) if long_name in x]
            if len(x) != 0:
                long_name = x[0]
                if ds[var].units == vars_dict[long_name]['db_units']:
                    print('\n' + var)
                    if ds[var]._FillValue not in vars_dict[long_name]['fv']:
                        vars_dict[long_name]['fv'].append(ds[var]._FillValue)
                    if ds[var].units not in vars_dict[long_name]['units']:
                        vars_dict[long_name]['units'].append(ds[var].units)
                    tD = ds['time'].values
                    varD = ds[var].values
                    deployD = ds['deployment'].values

                    # find the pressure to use from the data file
                    pvarname, pD, p_unit, p_name, p_fillvalue = cf.add_pressure_to_dictionary_of_sci_vars(
                        ds)
                    if p_unit not in pressure_unit:
                        pressure_unit.append(p_unit)
                    if p_name not in pressure_name:
                        pressure_name.append(p_name)

                    l0 = len(tD)
                    # reject erroneous data
                    tD, pD, varD, deployD = reject_erroneous_data(
                        r, var, tD, pD, varD, deployD, ds[var]._FillValue)
                    l_erroneous = len(tD)
                    print('{} erroneous data'.format(l0 - l_erroneous))

                    if l_erroneous != 0:
                        # reject time range from data portal file export
                        tD, pD, varD, deployD = reject_timestamps_data_portal(
                            ds.subsite, r, tD, pD, varD, deployD)
                        l_portal = len(tD)
                        print('{} suspect  - data portal'.format(l_erroneous -
                                                                 l_portal))

                        if l_portal != 0:
                            # reject timestamps from stat analysis
                            Dpath = '{}/{}/{}/{}/{}'.format(
                                sDir, ds.subsite[0:2], ds.subsite, r,
                                'time_to_exclude')
                            tD, pD, varD, deployD = reject_timestamps_from_stat_analysis(
                                Dpath, deployment, var, tD, pD, varD, deployD)
                            l_stat = len(tD)
                            print(
                                '{} suspect  - stat analysis'.format(l_portal -
                                                                     l_stat))

                            # # reject timestamps in a depth range
                            tD, pD, varD, deployD = reject_data_in_depth_range(
                                tD, pD, varD, deployD, zdbar)
                            l_zrange = len(tD)
                            print('{} suspect - water depth > {} dbar'.format(
                                l_stat - l_zrange, zdbar))
                        else:
                            print(
                                'suspect data - rejected all, see data portal')
                    else:
                        print('erroneous data - rejected all')

                    vars_dict[long_name]['t'] = np.append(
                        vars_dict[long_name]['t'], tD)
                    vars_dict[long_name]['pressure'] = np.append(
                        vars_dict[long_name]['pressure'], pD)
                    vars_dict[long_name]['values'] = np.append(
                        vars_dict[long_name]['values'], varD)
                    vars_dict[long_name]['deployments'] = np.append(
                        vars_dict[long_name]['deployments'], deployD)
                total_len += l0

        except AttributeError:
            continue

    return variable_dict, pressure_unit, pressure_name, total_len

Example #3

Show file

File: plot_ts.py Project: ooi-data-lab/data-review-tools

def main(sDir, url_list, start_time, end_time, preferred_only):
    rd_list = []
    for uu in url_list:
        elements = uu.split('/')[-2].split('-')
        rd = '-'.join((elements[1], elements[2], elements[3], elements[4]))
        if rd not in rd_list:
            rd_list.append(rd)

    for r in rd_list:
        print('\n{}'.format(r))
        datasets = []
        for u in url_list:
            splitter = u.split('/')[-2].split('-')
            rd_check = '-'.join((splitter[1], splitter[2], splitter[3], splitter[4]))
            if rd_check == r:
                udatasets = cf.get_nc_urls([u])
                datasets.append(udatasets)
        datasets = list(itertools.chain(*datasets))
        fdatasets = []
        if preferred_only == 'yes':
            # get the preferred stream information
            ps_df, n_streams = cf.get_preferred_stream_info(r)
            for index, row in ps_df.iterrows():
                for ii in range(n_streams):
                    rms = '-'.join((r, row[ii]))
                    for dd in datasets:
                        spl = dd.split('/')[-2].split('-')
                        catalog_rms = '-'.join((spl[1], spl[2], spl[3], spl[4], spl[5], spl[6]))
                        fdeploy = dd.split('/')[-1].split('_')[0]
                        if rms == catalog_rms and fdeploy == row['deployment']:
                            fdatasets.append(dd)
        else:
            fdatasets = datasets

        for fd in fdatasets:
            with xr.open_dataset(fd, mask_and_scale=False) as ds:
                ds = ds.swap_dims({'obs': 'time'})

                if start_time is not None and end_time is not None:
                    ds = ds.sel(time=slice(start_time, end_time))
                    if len(ds['time'].values) == 0:
                        print('No data to plot for specified time range: ({} to {})'.format(start_time, end_time))
                        continue

                fname, subsite, refdes, method, stream, deployment = cf.nc_attributes(fd)
                print('\nPlotting {} {}'.format(r, deployment))
                array = subsite[0:2]
                save_dir = os.path.join(sDir, array, subsite, refdes, 'ts_plots')
                cf.create_dir(save_dir)

                tme = ds['time'].values
                t0 = pd.to_datetime(tme.min()).strftime('%Y-%m-%dT%H:%M:%S')
                t1 = pd.to_datetime(tme.max()).strftime('%Y-%m-%dT%H:%M:%S')
                title = ' '.join((deployment, refdes, method))
                filename = '-'.join(('_'.join(fname.split('_')[:-1]), 'ts', t0[:10]))

                ds_vars = list(ds.data_vars.keys())
                raw_vars = cf.return_raw_vars(ds_vars)

                xvar = return_var(ds, raw_vars, 'salinity', 'Practical Salinity')
                sal = ds[xvar].values
                sal_fv = ds[xvar]._FillValue

                yvar = return_var(ds, raw_vars, 'temp', 'Seawater Temperature')
                temp = ds[yvar].values
                temp_fv = ds[yvar]._FillValue

                press = pf.pressure_var(ds, list(ds.coords.keys()))
                if press is None:
                    press = pf.pressure_var(ds, list(ds.data_vars.keys()))
                p = ds[press].values

                # get rid of nans, 0.0s, fill values
                sind1 = (~np.isnan(sal)) & (sal != 0.0) & (sal != sal_fv)
                sal = sal[sind1]
                temp = temp[sind1]
                tme = tme[sind1]
                p = p[sind1]
                tind1 = (~np.isnan(temp)) & (temp != 0.0) & (temp != temp_fv)
                sal = sal[tind1]
                temp = temp[tind1]
                tme = tme[tind1]
                p = p[tind1]

                # reject values outside global ranges:
                global_min, global_max = cf.get_global_ranges(r, xvar)
                if any(e is None for e in [global_min, global_max]):
                    sal = sal
                    temp = temp
                    tme = tme
                    p = p
                else:
                    sgr_ind = cf.reject_global_ranges(sal, global_min, global_max)
                    sal = sal[sgr_ind]
                    temp = temp[sgr_ind]
                    tme = tme[sgr_ind]
                    p = p[sgr_ind]

                global_min, global_max = cf.get_global_ranges(r, yvar)
                if any(e is None for e in [global_min, global_max]):
                    sal = sal
                    temp = temp
                    tme = tme
                    p = p
                else:
                    tgr_ind = cf.reject_global_ranges(temp, global_min, global_max)
                    sal = sal[tgr_ind]
                    temp = temp[tgr_ind]
                    tme = tme[tgr_ind]
                    p = p[tgr_ind]

                # get rid of outliers
                soind = cf.reject_outliers(sal, 5)
                sal = sal[soind]
                temp = temp[soind]
                tme = tme[soind]
                p = p[soind]

                toind = cf.reject_outliers(temp, 5)
                sal = sal[toind]
                temp = temp[toind]
                tme = tme[toind]
                p = p[toind]

                if len(sal) > 0:  # if there are any data to plot

                    colors = cm.rainbow(np.linspace(0, 1, len(tme)))

                    # Figure out boundaries (mins and maxes)
                    #smin = sal.min() - (0.01 * sal.min())
                    #smax = sal.max() + (0.01 * sal.max())
                    if sal.max() - sal.min() < 0.2:
                        smin = sal.min() - (0.0005 * sal.min())
                        smax = sal.max() + (0.0005 * sal.max())
                    else:
                        smin = sal.min() - (0.001 * sal.min())
                        smax = sal.max() + (0.001 * sal.max())

                    if temp.max() - temp.min() <= 1:
                        tmin = temp.min() - (0.01 * temp.min())
                        tmax = temp.max() + (0.01 * temp.max())
                    elif 1 < temp.max() - temp.min() < 1.5:
                        tmin = temp.min() - (0.05 * temp.min())
                        tmax = temp.max() + (0.05 * temp.max())
                    else:
                        tmin = temp.min() - (0.1 * temp.min())
                        tmax = temp.max() + (0.1 * temp.max())

                    # Calculate how many gridcells are needed in the x and y directions and
                    # Create temp and sal vectors of appropriate dimensions
                    xdim = int(round((smax-smin)/0.1 + 1, 0))
                    if xdim == 1:
                        xdim = 2
                    si = np.linspace(0, xdim - 1, xdim) * 0.1 + smin

                    if 1.1 <= temp.max() - temp.min() < 1.7:  # if the diff between min and max temp is small
                        ydim = int(round((tmax-tmin)/0.75 + 1, 0))
                        ti = np.linspace(0, ydim - 1, ydim) * 0.75 + tmin
                    elif temp.max() - temp.min() < 1.1:
                        ydim = int(round((tmax - tmin) / 0.1 + 1, 0))
                        ti = np.linspace(0, ydim - 1, ydim) * 0.1 + tmin
                    else:
                        ydim = int(round((tmax - tmin) + 1, 0))
                        ti = np.linspace(0, ydim - 1, ydim) + tmin

                    # Create empty grid of zeros
                    mdens = np.zeros((ydim, xdim))

                    # Loop to fill in grid with densities
                    for j in range(0, ydim):
                        for i in range(0, xdim):
                            mdens[j, i] = gsw.density.rho(si[i], ti[j], np.median(p))  # calculate density using median pressure value

                    fig, ax = pf.plot_ts(si, ti, mdens, sal, temp, colors)

                    ax.set_title((title + '\n' + t0 + ' - ' + t1 + '\ncolors = time (cooler: earlier)'), fontsize=9)
                    leg_text = ('Removed {} values (SD=5)'.format(len(ds[xvar].values) - len(sal)),)
                    ax.legend(leg_text, loc='best', fontsize=6)
                    pf.save_fig(save_dir, filename)

Example #4

Show file

File: plot_timeseries_hpies.py Project: ooi-data-lab/data-review-tools

def main(sDir, ncdir, start_time, end_time):
    rd_list = [ncdir.split('/')[-2]]

    for r in rd_list:
        print('\n{}'.format(r))
        datasets = []
        for root, dirs, files in os.walk(ncdir):
            for f in files:
                if f.endswith('.nc'):
                    datasets.append(f)
        # for u in url_list:
        #     splitter = u.split('/')[-2].split('-')
        #     rd_check = '-'.join((splitter[1], splitter[2], splitter[3], splitter[4]))
        #     if rd_check == r:
        #         udatasets = cf.get_nc_urls([u])
        #         datasets.append(udatasets)
        #datasets = list(itertools.chain(*datasets))
        for fd in datasets:
            if '_blank' not in fd:
                ds = xr.open_dataset(os.path.join(ncdir, fd),
                                     mask_and_scale=False)
                ds = ds.swap_dims({'obs': 'time'})
                ds_vars = list(ds.data_vars.keys()) + [
                    x for x in ds.coords.keys() if 'pressure' in x
                ]  # get pressure variable from coordinates
                #raw_vars = cf.return_raw_vars(ds_vars)

                if start_time is not None and end_time is not None:
                    ds = ds.sel(time=slice(start_time, end_time))
                    if len(ds['time'].values) == 0:
                        print(
                            'No data to plot for specified time range: ({} to {})'
                            .format(start_time, end_time))
                        continue

                fname, subsite, refdes, method, stream, deployment = cf.nc_attributes(
                    os.path.join(ncdir, fd))
                if 'NUTNR' in refdes or 'VEL3D in refdes':
                    vars = cf.return_science_vars(stream)
                else:
                    vars = cf.return_raw_vars(ds_vars)
                print('\nPlotting {} {}'.format(r, deployment))
                array = subsite[0:2]
                filename = '_'.join(fname.split('_')[:-1])
                save_dir = os.path.join(sDir, array, subsite, refdes,
                                        'timeseries_plots', deployment)
                cf.create_dir(save_dir)

                tm = ds['time'].values
                t0 = pd.to_datetime(tm.min()).strftime('%Y-%m-%dT%H:%M:%S')
                t1 = pd.to_datetime(tm.max()).strftime('%Y-%m-%dT%H:%M:%S')
                title = ' '.join((deployment, refdes, method))

                for var in vars:
                    print(var)
                    if var not in ['id', 'record_type',
                                   'unique_id']:  # if var != 'id'
                        y = ds[var]
                        try:
                            fv = y._FillValue
                        except AttributeError:
                            fv = np.nan
                        if len(y.dims) == 1:
                            # Check if the array is all NaNs
                            y[y == fv] = np.nan  # turn fill values to nans
                            if sum(np.isnan(y.values)) == len(y.values):
                                print(
                                    'Array of all NaNs and/or fill values - skipping plot.'
                                )

                            # Check if the array is all fill values
                            # elif len(y[y != fv]) == 0:
                            #     print('Array of all fill values - skipping plot.')

                            else:
                                # reject fill values
                                ind = y.values != fv
                                t = tm[ind]
                                y = y[ind]

                                # Plot all data
                                fig, ax = pf.plot_timeseries(t,
                                                             y,
                                                             y.name,
                                                             stdev=None)
                                ax.set_title((title + '\n' + t0 + ' - ' + t1),
                                             fontsize=9)
                                sfile = '-'.join((filename, y.name, t0[:10]))
                                pf.save_fig(save_dir, sfile)

                                # Plot data with outliers removed
                                fig, ax = pf.plot_timeseries(t,
                                                             y,
                                                             y.name,
                                                             stdev=5)
                                ax.set_title((title + '\n' + t0 + ' - ' + t1),
                                             fontsize=9)
                                sfile = '-'.join((filename, y.name,
                                                  t0[:10])) + '_rmoutliers'
                                pf.save_fig(save_dir, sfile)

Example #5

Show file

File: plot_timeseries.py Project: leilabbb/data-review-tools

def main(sDir, url_list, start_time, end_time, preferred_only):
    rd_list = []
    for uu in url_list:
        elements = uu.split('/')[-2].split('-')
        rd = '-'.join((elements[1], elements[2], elements[3], elements[4]))
        if rd not in rd_list:
            rd_list.append(rd)

    for r in rd_list:
        print('\n{}'.format(r))
        datasets = []
        for u in url_list:
            splitter = u.split('/')[-2].split('-')
            rd_check = '-'.join((splitter[1], splitter[2], splitter[3], splitter[4]))
            if rd_check == r:
                udatasets = cf.get_nc_urls([u])
                datasets.append(udatasets)
        datasets = list(itertools.chain(*datasets))
        fdatasets = []
        if preferred_only == 'yes':
            # get the preferred stream information
            ps_df, n_streams = cf.get_preferred_stream_info(r)
            for index, row in ps_df.iterrows():
                for ii in range(n_streams):
                    try:
                        rms = '-'.join((r, row[ii]))
                    except TypeError:
                        continue
                    for dd in datasets:
                        spl = dd.split('/')[-2].split('-')
                        catalog_rms = '-'.join((spl[1], spl[2], spl[3], spl[4], spl[5], spl[6]))
                        fdeploy = dd.split('/')[-1].split('_')[0]
                        if rms == catalog_rms and fdeploy == row['deployment']:
                            fdatasets.append(dd)
        else:
            fdatasets = datasets

        fdatasets = np.unique(fdatasets).tolist()
        main_sensor = r.split('-')[-1]
        fdatasets = cf.filter_collocated_instruments(main_sensor, fdatasets)

        for fd in fdatasets:
            if '_blank' not in fd:
                ds = xr.open_dataset(fd, mask_and_scale=False)
                ds = ds.swap_dims({'obs': 'time'})
                ds_vars = list(ds.data_vars.keys()) + [x for x in ds.coords.keys() if 'pressure' in x]  # get pressure variable from coordinates
                #raw_vars = cf.return_raw_vars(ds_vars)

                if start_time is not None and end_time is not None:
                    ds = ds.sel(time=slice(start_time, end_time))
                    if len(ds['time'].values) == 0:
                        print('No data to plot for specified time range: ({} to {})'.format(start_time, end_time))
                        continue

                fname, subsite, refdes, method, stream, deployment = cf.nc_attributes(fd)
                if 'NUTNR' in refdes:
                    vars = cf.return_science_vars(stream)
                else:
                    vars = cf.return_raw_vars(ds_vars)
                print('\nPlotting {} {}'.format(r, deployment))
                array = subsite[0:2]
                filename = '_'.join(fname.split('_')[:-1])
                save_dir = os.path.join(sDir, array, subsite, refdes, 'timeseries_plots', deployment)
                cf.create_dir(save_dir)

                tm = ds['time'].values
                t0 = pd.to_datetime(tm.min()).strftime('%Y-%m-%dT%H:%M:%S')
                t1 = pd.to_datetime(tm.max()).strftime('%Y-%m-%dT%H:%M:%S')
                title = ' '.join((deployment, refdes, method))

                for var in vars:
                    print(var)
                    if var != 'id':
                        y = ds[var]
                        try:
                            fv = y._FillValue
                        except AttributeError:
                            fv = np.nan
                        if len(y.dims) == 1:
                            # Check if the array is all NaNs
                            if sum(np.isnan(y.values)) == len(y.values):
                                print('Array of all NaNs - skipping plot.')

                            # Check if the array is all fill values
                            elif len(y[y != fv]) == 0:
                                print('Array of all fill values - skipping plot.')

                            else:
                                # reject fill values
                                ind = y.values != fv
                                t = tm[ind]
                                y = y[ind]

                                # Plot all data
                                fig, ax = pf.plot_timeseries(t, y, y.name, stdev=None)
                                ax.set_title((title + '\n' + t0 + ' - ' + t1), fontsize=9)
                                sfile = '-'.join((filename, y.name, t0[:10]))
                                pf.save_fig(save_dir, sfile)

                                # Plot data with outliers removed
                                fig, ax = pf.plot_timeseries(t, y, y.name, stdev=5)
                                ax.set_title((title + '\n' + t0 + ' - ' + t1), fontsize=9)
                                sfile = '-'.join((filename, y.name, t0[:10])) + '_rmoutliers'
                                pf.save_fig(save_dir, sfile)

Example #6

Show file

File: plot_profiles.py Project: ooi-data-lab/data-review-tools

def main(sDir, f, start_time, end_time):
    ff = pd.read_csv(os.path.join(sDir, f))
    url_list = ff['outputUrl'].tolist()
    for i, u in enumerate(url_list):
        print('\nUrl {} of {}: {}'.format(i + 1, len(url_list), u))
        main_sensor = u.split('/')[-2].split('-')[4]
        datasets = cf.get_nc_urls([u])
        datasets_sel = cf.filter_collocated_instruments(main_sensor, datasets)

        for ii, d in enumerate(datasets_sel):
            print('\nDataset {} of {}: {}'.format(ii + 1, len(datasets_sel),
                                                  d))
            with xr.open_dataset(d, mask_and_scale=False) as ds:
                ds = ds.swap_dims({'obs': 'time'})

                if start_time is not None and end_time is not None:
                    ds = ds.sel(time=slice(start_time, end_time))
                    if len(ds['time'].values) == 0:
                        print(
                            'No data to plot for specified time range: ({} to {})'
                            .format(start_time, end_time))
                        continue

                fname, subsite, refdes, method, stream, deployment = cf.nc_attributes(
                    d)
                vars = ds.data_vars.keys()

                if 'MOAS' in subsite and 'CTD' in main_sensor:  # for glider CTDs, pressure is a coordinate
                    pressure = 'sci_water_pressure_dbar'
                else:
                    pressure = pf.pressure_var(ds, vars)

                raw_vars = cf.return_raw_vars(vars)
                raw_vars = [s for s in raw_vars if s not in [pressure]
                            ]  # remove pressure from sci_vars

                save_dir = os.path.join(sDir, subsite, refdes, 'profile_plots',
                                        deployment)
                cf.create_dir(save_dir)

                t = ds['time'].values
                t0 = pd.to_datetime(t.min()).strftime('%Y-%m-%dT%H:%M:%S')
                t1 = pd.to_datetime(t.max()).strftime('%Y-%m-%dT%H:%M:%S')
                title = ' '.join((deployment, refdes, method))

                colors = cm.rainbow(np.linspace(0, 1, len(t)))

                y = ds[pressure]

                print('Plotting variables...')
                for var in raw_vars:
                    print(var)
                    x = ds[var]

                    # Plot all data
                    xlabel = var + " (" + x.units + ")"
                    ylabel = pressure + " (" + y.units + ")"

                    fig, ax = pf.plot_profiles(x,
                                               y,
                                               colors,
                                               ylabel,
                                               xlabel,
                                               stdev=None)
                    ax.set_title((title + '\n' + t0 + ' - ' + t1), fontsize=9)
                    sfile = '_'.join((fname[0:-46], x.name))
                    pf.save_fig(save_dir, sfile)

                    # Plot data with outliers removed
                    fig, ax = pf.plot_profiles(x,
                                               y,
                                               colors,
                                               ylabel,
                                               xlabel,
                                               stdev=5)
                    ax.set_title((title + '\n' + t0 + ' - ' + t1), fontsize=9)
                    sfile = '_'.join((fname[0:-46], x.name, 'rmoutliers'))
                    pf.save_fig(save_dir, sfile)