Ejemplo n.º 1
0
def active_drifters(bbox=None, time_start=None, time_end=None):
    bbox = bbox or [-100, -40, 18, 60]
    time_end = time_end or dt.date.today()
    time_start = time_start or (time_end - dt.timedelta(days=1))
    t0 = time_start.strftime('%Y-%m-%dT%H:%M:%SZ')
    t1 = time_end.strftime('%Y-%m-%dT%H:%M:%SZ')

    e = ERDDAP(server='OSMC', protocol="tabledap")
    e.dataset_id = "gdp_interpolated_drifter"

    # Setting constraints
    e.constraints = {
        "time>=": t0,
        "time<=": t1,
        'longitude>=': bbox[0],
        'longitude<=': bbox[1],
        'latitude>=': bbox[2],
        'latitude<=': bbox[3],
    }

    # e.variables = [
    #     "WMO",
    #     "latitude",
    #     "longitude",
    #     "time",
    # ]

    try:
        df = e.to_pandas()
    except ValueError:
        return pd.DataFrame()

    return df
Ejemplo n.º 2
0
    def load_data(self, year='2019'):
        self.dfs = {}
        for index, row in self.df.iterrows():
            if (self.glider_id
                    in row['Dataset ID']) and (year in row['Dataset ID']):
                print(row['Dataset ID'])

                try:
                    e = ERDDAP(
                        server=self.server_url,
                        protocol='tabledap',
                        response='csv',
                    )
                    e.dataset_id = row['Dataset ID']
                    e.constraints = self.constraints
                    e.variables = self.variables[row['Dataset ID']]
                except HTTPError:
                    print('Failed to generate url {}'.format(
                        row['Dataset ID']))
                    continue
                self.dfs.update({
                    row['Dataset ID']:
                    e.to_pandas(
                        index_col='time (UTC)',
                        parse_dates=True,
                        skiprows=(1, )  # units information can be dropped.
                    )
                })

        return (self.dfs)
Ejemplo n.º 3
0
def active_argo_floats(bbox=None, time_start=None, time_end=None, floats=None):
    """

    :param lon_lims: list containing westernmost longitude and easternmost latitude
    :param lat_lims: list containing southernmost latitude and northernmost longitude
    :param time_start: time to start looking for floats
    :param time_end: time to end looking for floats
    :return:
    """

    bbox = bbox or [-100, -45, 5, 46]
    time_end = time_end or dt.date.today()
    time_start = time_start or (time_end - dt.timedelta(days=1))
    floats = floats or False

    constraints = {
        'time>=': str(time_start),
        'time<=': str(time_end),
    }

    if bbox:
        constraints['longitude>='] = bbox[0]
        constraints['longitude<='] = bbox[1]
        constraints['latitude>='] = bbox[2]
        constraints['latitude<='] = bbox[3]

    if floats:
        constraints['platform_number='] = floats

    variables = [
        'platform_number',
        'time',
        'pres',
        'longitude',
        'latitude',
        'temp',
        'psal',
    ]

    e = ERDDAP(
        server='IFREMER',
        protocol='tabledap',
        response='nc'
    )

    e.dataset_id = 'ArgoFloats'
    e.constraints = constraints
    e.variables = variables

    try:
        df = e.to_pandas(
            parse_dates=['time (UTC)'],
            skiprows=(1,)  # units information can be dropped.
        ).dropna()
    except HTTPError:
        df = pd.DataFrame()

    return df
Ejemplo n.º 4
0
def get_erddap_dataset(server, ds_id, variables=None, constraints=None):
    variables = variables or None
    constraints = constraints or None

    e = ERDDAP(server=server,
               protocol='tabledap',
               response='nc')
    e.dataset_id = ds_id
    if constraints:
        e.constraints = constraints
    if variables:
        e.variables = variables
    ds = e.to_xarray()
    ds = ds.sortby(ds.time)
    return ds
def check_dataset_empty(url_erddap,dataset_id,date_ini,date_end,lon_lim,lat_lim):

    from erddapy import ERDDAP

    constraints = {
        'time>=': date_ini,
        'time<=': date_end,
        'latitude>=': lat_lim[0],
        'latitude<=': lat_lim[1],
        'longitude>=': lon_lim[0],
        'longitude<=': lon_lim[1],
        }

    variable_names = [
            'depth',
            'latitude',
            'longitude',
            'time',
            'temperature',
            'salinity'
            ]

    e = ERDDAP(
            server=url_erddap,
            protocol='tabledap',
            response='nc'
            )

    e.dataset_id = dataset_id
    e.constraints = constraints
    e.variables = variable_names

    # Converting glider data to data frame
    # Cheching that data frame has data
    df = e.to_pandas()
    if len(df) < 4:
        empty_dataset = True
    else:
        empty_dataset = False

    return empty_dataset
Ejemplo n.º 6
0
def get_erddap_dataset(ds_id, variables=None, constraints=None, filetype=None):
    """
    Returns a netcdf dataset for a specified dataset ID (or dataframe if dataset cannot be converted to xarray)
    :param ds_id: dataset ID e.g. ng314-20200806T2040
    :param variables: optional list of variables
    :param constraints: optional list of constraints
    :param filetype: optional filetype to return, 'nc' (default) or 'dataframe'
    :return: netcdf dataset
    """
    variables = variables or None
    constraints = constraints or None
    filetype = filetype or 'nc'

    e = ERDDAP(server='NGDAC', protocol='tabledap', response='nc')
    e.dataset_id = ds_id
    if constraints:
        e.constraints = constraints
    if variables:
        e.variables = variables
    if filetype == 'nc':
        try:
            ds = e.to_xarray()
            ds = ds.sortby(ds.time)
        except OSError:
            print('No dataset available for specified constraints: {}'.format(
                ds_id))
            ds = []
        except TypeError:
            print('Cannot convert to xarray, providing dataframe: {}'.format(
                ds_id))
            ds = e.to_pandas().dropna()
    elif filetype == 'dataframe':
        ds = e.to_pandas().dropna()
    else:
        print('Unrecognized filetype: {}. Needs to  be "nc" or "dataframe"'.
              format(filetype))

    return ds
Ejemplo n.º 7
0
    def load_data(self,year='2019'):
        self.dfs = {}
        for index,row in self.df.iterrows():
            if (self.glider_id in row['Dataset ID']) and (year in row['Dataset ID']):
                print(row['Dataset ID'])

                try:
                    e = ERDDAP(server=self.server_url,
                        protocol='tabledap',
                        response='csv',
                    )
                    e.dataset_id=row['Dataset ID']
                    e.constraints=self.constraints
                    e.variables=self.variables[row['Dataset ID']]
                except HTTPError:
                    print('Failed to generate url {}'.format(row['Dataset ID']))
                    continue
                self.dfs.update({row['Dataset ID']: e.to_pandas(
                                        index_col='time (UTC)',
                                        parse_dates=True,
                                        skiprows=(1,)  # units information can be dropped.
                                        )})  
                
        return(self.dfs)
igrid = 1

#%% Reading bathymetry data

ncbath = xr.open_dataset(bath_file)
bath_lat = ncbath.variables['lat'][:]
bath_lon = ncbath.variables['lon'][:]
bath_elev = ncbath.variables['elevation'][:]

#%% Looping through all gliders found

for id in gliders:
    print('Reading ' + id )
    e.dataset_id = id
    e.constraints = constraints
    e.variables = variables
    
    # chacking data frame is not empty
    df = e.to_pandas()
    if len(df.index) != 0 :
    
        # Converting glider data to data frame
        df = e.to_pandas(
                index_col='time (UTC)',
                parse_dates=True,
                skiprows=(1,)  # units information can be dropped.
                ).dropna()

        # Coverting glider vectors into arrays
        timeg, ind = np.unique(df.index.values,return_index=True)
def read_glider_variables_erddap_server(url_erddap,dataset_id,\
                                   lat_lim,lon_lim,\
                                   variable_names=['time'],
                                    **kwargs):
    """
    Created on Tue Nov  3 11:26:05 2020

    @author: aristizabal

    This function reads glider variables from the IOOS
    and Rutgers erddapp glider servers.

    Inputs:
    url_erddap: url address of erddap server
                Example: 'https://data.ioos.us/gliders/erddap'
    dataset_id: Example: 'ng231-20190901T0000'
    variable_names: list of variable names.
                    Example:
                            variable_names = ['depth',
                                            'latitude',
                                            'longitude',
                                            'time',
                                            'temperature',
                                            'salinity']
                    The default value is variable_names=['time']
    lat_lim: latitude limits for the search.
            Example, lat_lim = [38.0,40.0]
    lon_lim: longitude limits for the search.
            Example, lon_lim = [-75.0,-72.0]
    date_ini: initial date of time window.
        This function accepts the data formats '%Y-%m-%d T %H:%M:%S Z' and '%Y/%m/%d/%H'.
        Examaple: date_ini = '2018-08-02T00:00:00Z' or '2018/08/02/00'
    date_end: initial date of time window.
        This function uses the data format '%Y-%m-%d T %H:%M:%S Z'.
        Examaple: date_ini = '2018-08-10T00:00:00Z' and '2018/08/10/00'

    Outputs:
    df: Pandas data frame with all the variables requested as vectors

    """

    from erddapy import ERDDAP
    import numpy as np

    date_ini = kwargs.get('date_ini', None)
    date_end = kwargs.get('date_end', None)

    # Find time window of interest
    if np.logical_or(date_ini == None, date_end == None):
        constraints = {
            'latitude>=': lat_lim[0],
            'latitude<=': lat_lim[1],
            'longitude>=': lon_lim[0],
            'longitude<=': lon_lim[1],
        }
    else:
        constraints = {
            'time>=': date_ini,
            'time<=': date_end,
            'latitude>=': lat_lim[0],
            'latitude<=': lat_lim[1],
            'longitude>=': lon_lim[0],
            'longitude<=': lon_lim[1],
        }

    e = ERDDAP(server=url_erddap, protocol='tabledap', response='nc')

    e.dataset_id = dataset_id
    e.constraints = constraints
    e.variables = variable_names

    # Converting glider data to data frame
    # Cheching that data frame has data
    df = e.to_pandas()
    if len(df) > 3:

        df = e.to_pandas(parse_dates=True)

    return df
def read_glider_data_erddap_server(url_erddap,dataset_id,\
                                   lat_lim,lon_lim,scatter_plot,**kwargs):
    """
    Created on Tue Feb  5 10:05:37 2019

    @author: aristizabal

    This function reads glider data from the IOOS
    Data Assembly Center (DAC).

    Inputs:
    url_erddap: url address of thredds server
                Example: 'https://data.ioos.us/gliders/erddap'
    dataset_id: this id is retrieved from the glider DAC using the
               function "retrieve_glider_id_erddap_server".
               Example: 'ru30-20180705T1825'
    lat_lim: latitude limits for the search.
            Example, lat_lim = [38.0,40.0]
    lon_lim: longitude limits for the search.
            Example, lon_lim = [-75.0,-72.0]
    date_ini: initial date of time window.
        This function accepts the data formats '%Y-%m-%d T %H:%M:%S Z' and '%Y/%m/%d/%H'.
        Examaple: date_ini = '2018-08-02T00:00:00Z' or '2018/08/02/00'
    date_end: initial date of time window.
        This function uses the data format '%Y-%m-%d T %H:%M:%S Z'.
        Examaple: date_ini = '2018-08-10T00:00:00Z' and '2018/08/10/00'
    scatter_plot: if equal to 'yes' then a scatter plot
            of the glider transect is plotted

    Outputs:
    tempg: all the glider profiles of temperature within the user defined time window
    saltg: all the glider profiles of salinity within the user defined time window
    latg: latitude within the user defined time window
    long: longitude within the user defined time window
    timeg: user defined time window
    depthg: depth vector for all profiles
    """

    from erddapy import ERDDAP
    import matplotlib.pyplot as plt
    import matplotlib.dates as mdates
    import cmocean
    import numpy as np

    date_ini = kwargs.get('date_ini', None)
    date_end = kwargs.get('date_end', None)

    # Find time window of interest
    if np.logical_or(date_ini == None, date_end == None):
        constraints = {
            'latitude>=': lat_lim[0],
            'latitude<=': lat_lim[1],
            'longitude>=': lon_lim[0],
            'longitude<=': lon_lim[1],
        }
    else:
        constraints = {
            'time>=': date_ini,
            'time<=': date_end,
            'latitude>=': lat_lim[0],
            'latitude<=': lat_lim[1],
            'longitude>=': lon_lim[0],
            'longitude<=': lon_lim[1],
        }

    variables = [
        'depth', 'latitude', 'longitude', 'time', 'temperature', 'salinity'
    ]

    e = ERDDAP(server=url_erddap, protocol='tabledap', response='nc')

    e.dataset_id = dataset_id
    e.constraints = constraints
    e.variables = variables

    # Converting glider data to data frame
    # Cheching that data frame has data
    df = e.to_pandas()
    if len(df) > 3:

        df = e.to_pandas(
            index_col='time (UTC)',
            parse_dates=True,
            skiprows=(1, )  # units information can be dropped.
        ).dropna()

        # Coverting glider vectors into arrays
        timeg, ind = np.unique(df.index.values, return_index=True)
        latg = df['latitude (degrees_north)'].values[ind]
        long = df['longitude (degrees_east)'].values[ind]

        dg = df['depth (m)'].values
        vg1 = df[df.columns[3]].values
        vg2 = df[df.columns[4]].values

        zn = np.int(np.max(np.diff(np.hstack([ind, len(dg)]))))

        depthg = np.empty((zn, len(timeg)))
        depthg[:] = np.nan
        tempg = np.empty((zn, len(timeg)))
        tempg[:] = np.nan
        saltg = np.empty((zn, len(timeg)))
        saltg[:] = np.nan

        for i, ii in enumerate(ind):
            if i < len(timeg) - 1:
                depthg[0:len(dg[ind[i]:ind[i + 1]]), i] = dg[ind[i]:ind[i + 1]]
                tempg[0:len(vg1[ind[i]:ind[i + 1]]),
                      i] = vg1[ind[i]:ind[i + 1]]
                saltg[0:len(vg2[ind[i]:ind[i + 1]]),
                      i] = vg2[ind[i]:ind[i + 1]]
            else:
                depthg[0:len(dg[ind[i]:len(dg)]), i] = dg[ind[i]:len(dg)]
                tempg[0:len(vg1[ind[i]:len(vg1)]), i] = vg1[ind[i]:len(vg1)]
                saltg[0:len(vg2[ind[i]:len(vg2)]), i] = vg2[ind[i]:len(vg2)]

        # Scatter plot
        if scatter_plot == 'yes':

            color_map = cmocean.cm.thermal
            varg = tempg
            timeg_matrix = np.tile(timeg.T, (depthg.shape[0], 1))
            ttg = np.ravel(timeg_matrix)
            dg = np.ravel(depthg)
            teg = np.ravel(varg)

            kw = dict(c=teg, marker='*', edgecolor='none')

            fig, ax = plt.subplots(figsize=(10, 3))
            cs = ax.scatter(ttg, -dg, cmap=color_map, **kw)
            #fig.colorbar(cs)
            ax.set_xlim(timeg[0], timeg[-1])

            ax.set_ylabel('Depth (m)', fontsize=14)
            cbar = plt.colorbar(cs)
            cbar.ax.set_ylabel('Temperature ($^oC$)', fontsize=14)
            ax.set_title(dataset_id, fontsize=16)
            xfmt = mdates.DateFormatter('%H:%Mh\n%d-%b')
            ax.xaxis.set_major_formatter(xfmt)
            plt.ylim([-np.nanmax(dg), 0])

            color_map = cmocean.cm.haline
            varg = saltg
            timeg_matrix = np.tile(timeg.T, (depthg.shape[0], 1))
            ttg = np.ravel(timeg_matrix)
            dg = np.ravel(depthg)
            teg = np.ravel(varg)

            kw = dict(c=teg, marker='*', edgecolor='none')

            fig, ax = plt.subplots(figsize=(10, 3))
            cs = ax.scatter(ttg, -dg, cmap=color_map, **kw)
            #fig.colorbar(cs)
            ax.set_xlim(timeg[0], timeg[-1])

            ax.set_ylabel('Depth (m)', fontsize=14)
            cbar = plt.colorbar(cs)
            cbar.ax.set_ylabel('Salinity', fontsize=14)
            ax.set_title(dataset_id, fontsize=16)
            xfmt = mdates.DateFormatter('%H:%Mh\n%d-%b')
            ax.xaxis.set_major_formatter(xfmt)
            plt.ylim([-np.nanmax(dg), 0])

    else:
        tempg = np.nan
        saltg = np.nan
        timeg = np.nan
        latg = np.nan
        long = np.nan
        depthg = np.nan

    return tempg, saltg, timeg, latg, long, depthg
Ejemplo n.º 11
0
def get_ndbc(bbox=None, time_start=None, time_end=None, buoy=None):
    bbox = bbox or [-100, -45, 5, 46]
    time_end = time_end or dt.date.today()
    time_start = time_start or (time_end - dt.timedelta(days=1))
    buoy = buoy or False
    time_formatter = '%Y-%m-%dT%H:%M:%SZ'

    e = ERDDAP(
        server='CSWC',
        protocol='tabledap',
        response='csv'
    )

    e.dataset_id = 'cwwcNDBCMet'
    e.constraints = {
        'time>=': time_start.strftime(time_formatter),
        'time<=': time_end.strftime(time_formatter),
    }

    if bbox:
        e.constraints['longitude>='] = bbox[0]
        e.constraints['longitude<='] = bbox[1]
        e.constraints['latitude>='] = bbox[2]
        e.constraints['latitude<='] = bbox[3]

    e.variables = [
        "station",
        "latitude",
        "longitude",
        "time"
    ]

    if buoy:
        e.constraints['station='] = buoy

    df = e.to_pandas(
        parse_dates=['time (UTC)'],
        skiprows=(1,)  # units information can be dropped.
    ).dropna()

    stations = df.station.unique()

    # e.variables = [
    #     "station",
    #     "latitude",
    #     "longitude",
    #     "wd",
    #     "wspd",
    #     "gst",
    #     "wvht",
    #     "dpd",
    #     "apd",
    #     "mwd",
    #     "bar",
    #     "atmp",
    #     "wtmp",
    #     "dewp",
    #     # "vis",
    #     # "ptdy",
    #     # "tide",
    #     "wspu",
    #     "wspv",
    #     "time",
    # ]

    try:
        df = e.to_pandas(
            parse_dates=['time (UTC)'],
            skiprows=(1,)  # units information can be dropped.
        ).dropna()
    except HTTPError:
        df = pd.DataFrame()

    return df
Ejemplo n.º 12
0
def active_gliders(bbox=None, time_start=None, time_end=dt.date.today(), glider_id=None):
    bbox = bbox or [-100, -40, 18, 60]
    time_start = time_start or (time_end - dt.timedelta(days=1))
    t0 = time_start.strftime('%Y-%m-%dT%H:%M:%SZ')
    t1 = time_end.strftime('%Y-%m-%dT%H:%M:%SZ')
    glider_id = glider_id or None

    e = ERDDAP(server='NGDAC')

    # Grab every dataset available
    # datasets = pd.read_csv(e.get_search_url(response='csv', search_for='all'))

    # Search constraints
    kw = dict()
    kw['min_time'] = t0
    kw['max_time'] = t1

    if bbox:
        kw['min_lon'] = bbox[0]
        kw['max_lon'] = bbox[1]
        kw['min_lat'] = bbox[2]
        kw['max_lat'] = bbox[3]

    if glider_id:
        search = glider_id
    else:
        search = None

    search_url = e.get_search_url(search_for=search, response='csv', **kw)

    try:
        # Grab the results
        search = pd.read_csv(search_url)
    except:
        # return empty dataframe if there are no results
        return pd.DataFrame()

    # Extract the IDs
    gliders = search['Dataset ID'].values

    msg = 'Found {} Glider Datasets:\n\n{}'.format
    print(msg(len(gliders), '\n'.join(gliders)))

    # Setting constraints
    constraints = {
            'time>=': t0,
            'time<=': t1,
            'longitude>=': bbox[0],
            'longitude<=': bbox[1],
            'latitude>=': bbox[2],
            'latitude<=': bbox[3],
            }

    variables = [
            'depth',
            'latitude',
            'longitude',
            'time',
            'temperature',
            'salinity',
            ]

    e = ERDDAP(
            server='NGDAC',
            protocol='tabledap',
            response='nc'
    )

    glider_dfs = []

    for id in gliders:
        # print('Reading ' + id)
        e.dataset_id = id
        e.constraints = constraints
        e.variables = variables

        # checking data frame is not empty
        try:
            df = e.to_pandas(
                index_col='time (UTC)',
                parse_dates=True,
                skiprows=(1,)  # units information can be dropped.
            ).dropna()
        except:
            continue
        df = df.reset_index()
        df['dataset_id'] = id
        df = df.set_index(['dataset_id', 'time (UTC)'])
        glider_dfs.append(df)

    try:
        ndf = pd.concat(glider_dfs)
    except ValueError:
        return pd.DataFrame()

    return ndf
def read_glider_data_erddap_Rutgers_server(url_erddap,dataset_id,\
                                   lat_lim,lon_lim,scatter_plot,**kwargs):

    from erddapy import ERDDAP
    import matplotlib.pyplot as plt
    import matplotlib.dates as mdates
    import cmocean
    import numpy as np

    date_ini = kwargs.get('date_ini', None)
    date_end = kwargs.get('date_end', None)

    # Find time window of interest
    if np.logical_or(date_ini == None, date_end == None):
        constraints = {
            'latitude>=': lat_lim[0],
            'latitude<=': lat_lim[1],
            'longitude>=': lon_lim[0],
            'longitude<=': lon_lim[1],
        }
    else:
        constraints = {
            'time>=': date_ini,
            'time<=': date_end,
            'latitude>=': lat_lim[0],
            'latitude<=': lat_lim[1],
            'longitude>=': lon_lim[0],
            'longitude<=': lon_lim[1],
        }

    variables = [
        'depth', 'latitude', 'longitude', 'time', 'temperature', 'salinity'
    ]

    e = ERDDAP(server=url_erddap, protocol='tabledap', response='nc')

    e.dataset_id = dataset_id
    e.constraints = constraints
    e.variables = variables

    # Converting glider data to data frame
    # Cheching that data frame has data
    df = e.to_pandas()
    if len(df) != 0:

        df = e.to_pandas(
            index_col='time (UTC)',
            parse_dates=True,
            skiprows=(1, )  # units information can be dropped.
        ).dropna()

    dg = df['depth (m)'].values
    tg = df.index.values
    vg1 = df[df.columns[3]].values
    vg2 = df[df.columns[4]].values

    upcast = np.where(np.diff(dg) < 0)[0]
    oku = np.where(np.diff(upcast) > 1)[0]
    end_upcast = upcast[oku]

    downcast = np.where(np.diff(dg) > 0)[0]
    okd = np.where(np.diff(downcast) > 1)[0]
    end_downcast = downcast[okd]

    ind = np.hstack(
        [0, np.unique(np.hstack([end_upcast, end_downcast])),
         len(dg)])
    zn = np.max(np.diff(ind))

    depthg = np.empty((zn, len(ind)))
    depthg[:] = np.nan
    timeg = np.empty((zn, len(ind)))
    timeg[:] = np.nan
    tempg = np.empty((zn, len(ind)))
    tempg[:] = np.nan
    saltg = np.empty((zn, len(ind)))
    saltg[:] = np.nan

    for i in np.arange(len(ind)):
        if i == 0:
            indd = np.argsort(dg[ind[i]:ind[i + 1] + 2])
            depthg[0:len(dg[ind[i]:ind[i + 1] + 2]),
                   i] = dg[ind[i]:ind[i + 1] + 2][indd]
            timeg[0:len(dg[ind[i]:ind[i + 1] + 2]),
                  i] = mdates.date2num(tg[ind[i]:ind[i + 1] + 2][indd])
            tempg[0:len(vg1[ind[i]:ind[i + 1] + 2]),
                  i] = vg1[ind[i]:ind[i + 1] + 2][indd]
            saltg[0:len(vg2[ind[i]:ind[i + 1] + 2]),
                  i] = vg2[ind[i]:ind[i + 1] + 2][indd]
        if i < len(ind) - 1:
            indd = np.argsort(dg[ind[i] + 1:ind[i + 1] + 2])
            depthg[0:len(dg[ind[i] + 1:ind[i + 1] + 2]),
                   i] = dg[ind[i] + 1:ind[i + 1] + 2][indd]
            timeg[0:len(dg[ind[i] + 1:ind[i + 1] + 2]),
                  i] = mdates.date2num(tg[ind[i] + 1:ind[i + 1] + 2][indd])
            tempg[0:len(vg1[ind[i] + 1:ind[i + 1] + 2]),
                  i] = vg1[ind[i] + 1:ind[i + 1] + 2][indd]
            saltg[0:len(vg2[ind[i] + 1:ind[i + 1] + 2]),
                  i] = vg2[ind[i] + 1:ind[i + 1] + 2][indd]
        else:
            indd = np.argsort(dg[ind[i] + 1:len(dg)])
            depthg[0:len(dg[ind[i] + 1:len(dg)]),
                   i] = dg[ind[i] + 1:len(dg)][indd]
            timeg[0:len(dg[ind[i] + 1:len(dg)]),
                  i] = mdates.date2num(tg[ind[i] + 1:len(dg)][indd])
            tempg[0:len(vg1[ind[i] + 1:len(vg1)]),
                  i] = vg1[ind[i] + 1:len(vg1)][indd]
            saltg[0:len(vg2[ind[i] + 1:len(vg2)]),
                  i] = vg2[ind[i] + 1:len(vg2)][indd]

    # Scatter plot
    if scatter_plot == 'yes':

        color_map = cmocean.cm.thermal
        varg = tempg
        #timeg_matrix = np.tile(timeg.T,(depthg.shape[0],1))
        ttg = np.ravel(timeg)
        dg = np.ravel(depthg)
        teg = np.ravel(varg)

        kw = dict(c=teg, marker='*', edgecolor='none')

        fig, ax = plt.subplots(figsize=(10, 3))
        cs = ax.scatter(ttg, -dg, cmap=color_map, **kw)
        #fig.colorbar(cs)
        ax.set_xlim(np.nanmin(ttg), np.nanmax(ttg))

        ax.set_ylabel('Depth (m)', fontsize=14)
        cbar = plt.colorbar(cs)
        cbar.ax.set_ylabel('Temperature ($^oC$)', fontsize=14)
        ax.set_title(dataset_id, fontsize=16)
        xfmt = mdates.DateFormatter('%H:%Mh\n%d-%b')
        ax.xaxis.set_major_formatter(xfmt)
        plt.ylim([-np.nanmax(dg), 0])

        color_map = cmocean.cm.haline
        varg = saltg
        #timeg_matrix = np.tile(timeg.T,(depthg.shape[0],1))
        ttg = np.ravel(timeg)
        dg = np.ravel(depthg)
        teg = np.ravel(varg)

        kw = dict(c=teg, marker='*', edgecolor='none')

        fig, ax = plt.subplots(figsize=(10, 3))
        cs = ax.scatter(ttg, -dg, cmap=color_map, **kw)
        #fig.colorbar(cs)
        ax.set_xlim(np.nanmin(ttg), np.nanmax(ttg))

        ax.set_ylabel('Depth (m)', fontsize=14)
        cbar = plt.colorbar(cs)
        cbar.ax.set_ylabel('Salinity', fontsize=14)
        ax.set_title(dataset_id, fontsize=16)
        xfmt = mdates.DateFormatter('%H:%Mh\n%d-%b')
        ax.xaxis.set_major_formatter(xfmt)
        plt.ylim([-np.nanmax(dg), 0])

    return tempg, saltg, timeg, latg, long, depthg
Ejemplo n.º 14
0
#https://coastwatch.pfeg.noaa.gov/erddap/info/wocecpr/index.html
### initializing the erddap class instance with the data server address and the connection protocol.
e = ERDDAP(
    server='https://coastwatch.pfeg.noaa.gov/erddap',
    protocol='tabledap',
)
### specifying the data format of the response.
e.response = 'csv'
### specifying the database name we need the data from.
e.dataset_id = 'wocecpr'
### specifying the data constraints
e.constraints = {
    'time>=': '2000-01-15T01:24:00Z',
    'time<=': '2010-01-17T13:39:00Z',
    'latitude>=': 37.0,
    'latitude<=': 43.43,
    'longitude>=': 317.56,
    'longitude<=': 322.87,
}
###specifying the variables(columns name) to be retrived.
e.variables = [
    'sample',
    'latitude',
    'longitude',
    'life_stage',
    'abundance',
    'time',
]
### searching for the server link and doing the handshaking process.
search_url = e.get_search_url(response='csv')
### receiving requested data and saving it into a dataframe
Ejemplo n.º 15
0
import matplotlib.pyplot as plt
from erddapy import ERDDAP
import pandas as pd
import seaborn as sns
sns.set(rc={'figure.figsize': (11, 4)})


e = ERDDAP(
    server='https://erddap.marine.ie/erddap',
    protocol='tabledap',
)

e.dataset_id = 'IWBNetwork'

e.constraints = {
    'time>=': '2015-06-28T00:00:00Z',
    'station_id=': 'M3'
}

e.variables = [
    'time',
    'AtmosphericPressure',
    'WindDirection',
    'WindSpeed',
    'WaveHeight',
    'WavePeriod',
    'MeanWaveDirection',
    # 'Hmax',
    # 'AirTemperature',
    'SeaTemperature'
]
from erddapy import ERDDAP


path = Path().absolute()
fname = path.joinpath("data", "water_level_example.csv")

if fname.is_file():
    data = pd.read_csv(fname, parse_dates=["time (UTC)"])
else:
    e = ERDDAP(
        server="http://erddap.aoos.org/erddap/",
        protocol="tabledap"
    )
    e.dataset_id = "kotzebue-alaska-water-level"
    e.constraints = {
        "time>=": "2018-09-05T21:00:00Z",
        "time<=": "2019-07-10T19:00:00Z",
    }
    e.variables = [
        variable_name,
        "time",
        "z",
    ]
    data = e.to_pandas(
        index_col="time (UTC)",
        parse_dates=True,
    )
    data["timestamp"] = data.index.astype("int64") // 1e9
    data.to_csv(fname)

data.head()
    )

    e.response = 'csv'
    #e.dataset_id = drifter_year + '_Argos_Drifters_NRT'
    #use this until we can get location quality back into older years
    #currently it is only in erddap for 2020 and newer
    #if int(drifter_years[0]) >= 2020:
    e.variables = [
        'trajectory_id', 'strain', 'voltage', 'time', 'latitude', 'sst',
        'longitude', 'location_quality'
    ]
    #else:
    #    e.variables = ['trajectory_id','strain', 'voltage', 'time', 'latitude', 'sst',
    #                   'longitude']

    e.constraints = {'trajectory_id=': argos_id}
    df_years = {}
    for year in drifter_years:
        e.dataset_id = year + '_Argos_Drifters_NRT'
        df = e.to_pandas(
            index_col='time (UTC)',
            parse_dates=True,
            skiprows=(1, )  # units information can be dropped.
        )
        df.columns = [x[1].split()[0] for x in enumerate(df.columns)]
        df_years[year] = df
    df = pd.concat(df_years.values())
    #get rid of timezone info
    df = df.tz_localize(None)
    # # names = ['trajectory_id','strain','voltage','datetime','latitude','sst','longitude']
    # # df=pd.read_csv(filename, skiprows=1, header=0, names=names, parse_dates=[3])
Ejemplo n.º 18
0
def load_data_from_erddap(config, station_id=None, station_data=None):
    mcf_template = yaml.load(open(config['static_data']['mcf_template'], 'r'),
                             Loader=yaml.FullLoader)

    es = ERDDAP(
        server=config['dynamic_data']['erddap_server'],
        protocol=config['dynamic_data']['erddap_protocol'],
    )

    if station_id is None:
        #load all station data MCF skeleton
        stations = {}
        es.dataset_id = 'allDatasets'

        # filter out "log in" datasets as the vast majoirty of their available metadata is unavailable
        es.constraints = {'accessible=': 'public'}
        stations_df = es.to_pandas()

        # drop 'allDatasets' row
        stations_df.drop(labels=0, axis='index', inplace=True)
        print(stations_df)

        for index_label, row_series in stations_df.iterrows():
            id = row_series['datasetID']

            # ensure each station has an independant copy of the MCF skeleton
            stations[id] = copy.deepcopy(mcf_template)
            dataset_url = row_series['tabledap'] if row_series[
                'dataStructure'] == 'table' else row_series['griddap']

            stations[id]['metadata']['identifier'] = id
            stations[id]['metadata']['dataseturi'] = dataset_url

            stations[id]['spatial']['datatype'] = 'textTable' if row_series[
                'dataStructure'] == 'table' else 'grid'

            stations[id]['spatial']['geomtype'] = row_series['cdm_data_type']
            stations[id]['spatial']['bbox'] = '%s,%s,%s,%s' % (
                row_series['minLongitude (degrees_east)'],
                row_series['minLatitude (degrees_north)'],
                row_series['maxLongitude (degrees_east)'],
                row_series['maxLatitude (degrees_north)'])

            stations[id]['identification']['title'] = row_series['title']
            stations[id]['identification']['dates']['creation'] = row_series[
                'minTime (UTC)']
            stations[id]['identification']['temporal_begin'] = row_series[
                'minTime (UTC)']
            stations[id]['identification']['temporal_end'] = row_series[
                'maxTime (UTC)']
            stations[id]['identification']['url'] = dataset_url
            stations[id]['identification']['abstract'] = row_series['summary']

            stations[id]['distribution']['erddap']['url'] = dataset_url
            stations[id]['distribution']['erddap']['name'] = row_series[
                'title']

        print('Stations after ERDDAP call...')
        print(stations)

        return_value = stations
        pass

    else:
        #load specific station data into MCF skeleton
        print('Loading ERDDAP metadata for station: %s' % (station_id))

        es.dataset_id = station_id

        metadata_url = es.get_download_url(dataset_id='%s/index' %
                                           (station_id),
                                           response='csv',
                                           protocol='info')
        metadata = pd.read_csv(filepath_or_buffer=metadata_url)
        print(metadata_url)
        print(metadata.head())

        # ERDDAP ISO XML provides a list of dataset field names (long & short), data types & units
        # of measurement, in case this becomes useful for the CIOOS metadata standard we can extend
        # the YAML skeleton to include these and the template to export them.
        #
        # below most varible attributes from ERDDAP are extracted and pivoted to describe the field
        # actual field data types are extracted seperately and merged into the pivoted dataframe
        # for completeness
        columns_pivot = metadata[(metadata['Variable Name'] != 'NC_GLOBAL')
                                 & (metadata['Row Type'] != 'variable')].pivot(
                                     index='Variable Name',
                                     columns='Attribute Name',
                                     values='Value')
        col_data_types = metadata[(metadata['Row Type'] == 'variable')][[
            'Variable Name', 'Data Type'
        ]]
        df_merge = pd.merge(columns_pivot, col_data_types, on='Variable Name')

        station_data['dataset'] = {}

        for index_label, field_series in df_merge.iterrows():
            field_name = field_series['Variable Name']
            station_data['dataset'][field_name] = {}
            station_data['dataset'][field_name]['long_name'] = field_series[
                'long_name']
            station_data['dataset'][field_name]['data_type'] = field_series[
                'Data Type']
            station_data['dataset'][field_name]['units'] = field_series[
                'units']

        station_data['identification']['keywords']['default'][
            'keywords'] = metadata[
                (metadata['Variable Name'] == 'NC_GLOBAL')
                & (metadata['Attribute Name'] == 'keywords')]['Value'].values

        return_value = station_data

    return return_value
Ejemplo n.º 19
0
def GOFS_RTOFS_vs_Argo_floats(lon_forec_track, lat_forec_track, lon_forec_cone,
                              lat_forec_cone, lon_best_track, lat_best_track,
                              lon_lim, lat_lim, folder_fig):
    #%% User input

    #GOFS3.1 output model location
    url_GOFS_ts = 'http://tds.hycom.org/thredds/dodsC/GLBy0.08/expt_93.0/ts3z'

    # RTOFS files
    folder_RTOFS = '/home/coolgroup/RTOFS/forecasts/domains/hurricanes/RTOFS_6hourly_North_Atlantic/'

    nc_files_RTOFS = ['rtofs_glo_3dz_f006_6hrly_hvr_US_east.nc',\
                      'rtofs_glo_3dz_f012_6hrly_hvr_US_east.nc',\
                      'rtofs_glo_3dz_f018_6hrly_hvr_US_east.nc',\
                      'rtofs_glo_3dz_f024_6hrly_hvr_US_east.nc']

    # COPERNICUS MARINE ENVIRONMENT MONITORING SERVICE (CMEMS)
    url_cmems = 'http://nrt.cmems-du.eu/motu-web/Motu'
    service_id = 'GLOBAL_ANALYSIS_FORECAST_PHY_001_024-TDS'
    product_id = 'global-analysis-forecast-phy-001-024'
    depth_min = '0.493'
    out_dir = '/home/aristizabal/crontab_jobs'

    # Bathymetry file
    #bath_file = '/Users/aristizabal/Desktop/MARACOOS_project/Maria_scripts/nc_files/GEBCO_2014_2D_-100.0_0.0_-60.0_45.0.nc'
    bath_file = '/home/aristizabal/bathymetry_files/GEBCO_2014_2D_-100.0_0.0_-10.0_50.0.nc'

    # Argo floats
    url_Argo = 'http://www.ifremer.fr/erddap'

    #%%

    from matplotlib import pyplot as plt
    import numpy as np
    import xarray as xr
    import netCDF4
    from datetime import datetime, timedelta
    import cmocean
    import matplotlib.dates as mdates
    from erddapy import ERDDAP
    import pandas as pd
    import os

    # Do not produce figures on screen
    plt.switch_backend('agg')

    # Increase fontsize of labels globally
    plt.rc('xtick', labelsize=14)
    plt.rc('ytick', labelsize=14)
    plt.rc('legend', fontsize=14)

    #%% Reading bathymetry data

    ncbath = xr.open_dataset(bath_file)
    bath_lat = ncbath.variables['lat'][:]
    bath_lon = ncbath.variables['lon'][:]
    bath_elev = ncbath.variables['elevation'][:]

    oklatbath = np.logical_and(bath_lat >= lat_lim[0], bath_lat <= lat_lim[-1])
    oklonbath = np.logical_and(bath_lon >= lon_lim[0], bath_lon <= lon_lim[-1])

    bath_latsub = bath_lat[oklatbath]
    bath_lonsub = bath_lon[oklonbath]
    bath_elevs = bath_elev[oklatbath, :]
    bath_elevsub = bath_elevs[:, oklonbath]

    #%% Get time bounds for current day
    #ti = datetime.today()
    ti = datetime.today() - timedelta(1) - timedelta(hours=6)
    tini = datetime(ti.year, ti.month, ti.day)
    te = ti + timedelta(2)
    tend = datetime(te.year, te.month, te.day)

    #%% Look for Argo datasets

    e = ERDDAP(server=url_Argo)

    # Grab every dataset available
    #datasets = pd.read_csv(e.get_search_url(response='csv', search_for='all'))

    kw = {
        'min_lon': lon_lim[0],
        'max_lon': lon_lim[1],
        'min_lat': lat_lim[0],
        'max_lat': lat_lim[1],
        'min_time': str(tini),
        'max_time': str(tend),
    }

    search_url = e.get_search_url(response='csv', **kw)

    # Grab the results
    search = pd.read_csv(search_url)

    # Extract the IDs
    dataset = search['Dataset ID'].values

    msg = 'Found {} Datasets:\n\n{}'.format
    print(msg(len(dataset), '\n'.join(dataset)))

    dataset_type = dataset[0]

    constraints = {
        'time>=': str(tini),
        'time<=': str(tend),
        'latitude>=': lat_lim[0],
        'latitude<=': lat_lim[1],
        'longitude>=': lon_lim[0],
        'longitude<=': lon_lim[1],
    }

    variables = [
        'platform_number',
        'time',
        'pres',
        'longitude',
        'latitude',
        'temp',
        'psal',
    ]

    e = ERDDAP(server=url_Argo, protocol='tabledap', response='nc')

    e.dataset_id = dataset_type
    e.constraints = constraints
    e.variables = variables

    print(e.get_download_url())

    df = e.to_pandas(
        parse_dates=True,
        skiprows=(1, )  # units information can be dropped.
    ).dropna()

    argo_ids = np.asarray(df['platform_number'])
    argo_times = np.asarray(df['time (UTC)'])
    argo_press = np.asarray(df['pres (decibar)'])
    argo_lons = np.asarray(df['longitude (degrees_east)'])
    argo_lats = np.asarray(df['latitude (degrees_north)'])
    argo_temps = np.asarray(df['temp (degree_Celsius)'])
    argo_salts = np.asarray(df['psal (PSU)'])

    #%% GOGF 3.1

    try:
        GOFS_ts = xr.open_dataset(url_GOFS_ts, decode_times=False)

        lt_GOFS = np.asarray(GOFS_ts['lat'][:])
        ln_GOFS = np.asarray(GOFS_ts['lon'][:])
        tt = GOFS_ts['time']
        t_GOFS = netCDF4.num2date(tt[:], tt.units)
        depth_GOFS = np.asarray(GOFS_ts['depth'][:])
    except Exception as err:
        print(err)
        GOFS_ts = np.nan
        lt_GOFS = np.nan
        ln_GOFS = np.nan
        depth_GOFS = np.nan
        t_GOFS = ti

    #%% Map Argo floats

    lev = np.arange(-9000, 9100, 100)
    plt.figure()
    plt.contourf(bath_lonsub,
                 bath_latsub,
                 bath_elevsub,
                 lev,
                 cmap=cmocean.cm.topo)
    plt.plot(lon_forec_track, lat_forec_track, '.-', color='gold')
    plt.plot(lon_forec_cone, lat_forec_cone, '.-b', markersize=1)
    plt.plot(lon_best_track, lat_best_track, 'or', markersize=3)

    argo_idd = np.unique(argo_ids)
    for i, id in enumerate(argo_idd):
        okind = np.where(argo_ids == id)[0]
        plt.plot(np.unique(argo_lons[okind]),
                 np.unique(argo_lats[okind]),
                 's',
                 color='darkorange',
                 markersize=5,
                 markeredgecolor='k')

    plt.title('Argo Floats ' + str(tini)[0:13] + '-' + str(tend)[0:13],
              fontsize=16)
    plt.axis('scaled')
    plt.xlim(lon_lim[0], lon_lim[1])
    plt.ylim(lat_lim[0], lat_lim[1])

    file = folder_fig + 'ARGO_lat_lon'
    #file = folder_fig + 'ARGO_lat_lon_' + str(np.unique(argo_times)[0])[0:10]
    plt.savefig(file, bbox_inches='tight', pad_inches=0.1)

    #%% Figure argo float vs GOFS and vs RTOFS

    argo_idd = np.unique(argo_ids)

    for i, id in enumerate(argo_idd):
        print(id)
        okind = np.where(argo_ids == id)[0]
        argo_time = np.asarray([
            datetime.strptime(t, '%Y-%m-%dT%H:%M:%SZ')
            for t in argo_times[okind]
        ])

        argo_lon = argo_lons[okind]
        argo_lat = argo_lats[okind]
        argo_pres = argo_press[okind]
        argo_temp = argo_temps[okind]
        argo_salt = argo_salts[okind]

        # GOFS
        print('Retrieving variables from GOFS')
        if isinstance(GOFS_ts, float):
            temp_GOFS = np.nan
            salt_GOFS = np.nan
        else:
            #oktt_GOFS = np.where(t_GOFS >= argo_time[0])[0][0]
            ttGOFS = np.asarray([
                datetime(t_GOFS[i].year, t_GOFS[i].month, t_GOFS[i].day,
                         t_GOFS[i].hour) for i in np.arange(len(t_GOFS))
            ])
            tstamp_GOFS = [
                mdates.date2num(ttGOFS[i]) for i in np.arange(len(ttGOFS))
            ]
            oktt_GOFS = np.unique(
                np.round(
                    np.interp(mdates.date2num(argo_time[0]), tstamp_GOFS,
                              np.arange(len(tstamp_GOFS)))).astype(int))[0]
            oklat_GOFS = np.where(lt_GOFS >= argo_lat[0])[0][0]
            oklon_GOFS = np.where(ln_GOFS >= argo_lon[0] + 360)[0][0]
            temp_GOFS = np.asarray(GOFS_ts['water_temp'][oktt_GOFS, :,
                                                         oklat_GOFS,
                                                         oklon_GOFS])
            salt_GOFS = np.asarray(GOFS_ts['salinity'][oktt_GOFS, :,
                                                       oklat_GOFS, oklon_GOFS])

        # RTOFS
        #Time window
        year = int(argo_time[0].year)
        month = int(argo_time[0].month)
        day = int(argo_time[0].day)
        tini = datetime(year, month, day)
        tend = tini + timedelta(days=1)

        # Read RTOFS grid and time
        print('Retrieving coordinates from RTOFS')

        if tini.month < 10:
            if tini.day < 10:
                fol = 'rtofs.' + str(tini.year) + '0' + str(
                    tini.month) + '0' + str(tini.day)
            else:
                fol = 'rtofs.' + str(tini.year) + '0' + str(tini.month) + str(
                    tini.day)
        else:
            if tini.day < 10:
                fol = 'rtofs.' + str(tini.year) + str(tini.month) + '0' + str(
                    tini.day)
            else:
                fol = 'rtofs.' + str(tini.year) + str(tini.month) + str(
                    tini.day)

        ncRTOFS = xr.open_dataset(folder_RTOFS + fol + '/' + nc_files_RTOFS[0])
        latRTOFS = np.asarray(ncRTOFS.Latitude[:])
        lonRTOFS = np.asarray(ncRTOFS.Longitude[:])
        depth_RTOFS = np.asarray(ncRTOFS.Depth[:])

        tRTOFS = []
        for t in np.arange(len(nc_files_RTOFS)):
            ncRTOFS = xr.open_dataset(folder_RTOFS + fol + '/' +
                                      nc_files_RTOFS[t])
            tRTOFS.append(np.asarray(ncRTOFS.MT[:])[0])

        tRTOFS = np.asarray([mdates.num2date(mdates.date2num(tRTOFS[t])) \
                  for t in np.arange(len(nc_files_RTOFS))])

        oktt_RTOFS = np.where(
            mdates.date2num(tRTOFS) >= mdates.date2num(argo_time[0]))[0][0]
        oklat_RTOFS = np.where(latRTOFS[:, 0] >= argo_lat[0])[0][0]
        oklon_RTOFS = np.where(lonRTOFS[0, :] >= argo_lon[0])[0][0]

        nc_file = folder_RTOFS + fol + '/' + nc_files_RTOFS[oktt_RTOFS]
        ncRTOFS = xr.open_dataset(nc_file)
        #time_RTOFS = tRTOFS[oktt_RTOFS]
        temp_RTOFS = np.asarray(ncRTOFS.variables['temperature'][0, :,
                                                                 oklat_RTOFS,
                                                                 oklon_RTOFS])
        salt_RTOFS = np.asarray(ncRTOFS.variables['salinity'][0, :,
                                                              oklat_RTOFS,
                                                              oklon_RTOFS])
        #lon_RTOFS = lonRTOFS[0,oklon_RTOFS]
        #lat_RTOFS = latRTOFS[oklat_RTOFS,0]

        # Downloading and reading Copernicus output
        motuc = 'python -m motuclient --motu ' + url_cmems + \
        ' --service-id ' + service_id + \
        ' --product-id ' + product_id + \
        ' --longitude-min ' + str(argo_lon[0]-2/12) + \
        ' --longitude-max ' + str(argo_lon[0]+2/12) + \
        ' --latitude-min ' + str(argo_lat[0]-2/12) + \
        ' --latitude-max ' + str(argo_lat[0]+2/12) + \
        ' --date-min ' + '"' + str(tini-timedelta(0.5)) + '"' + \
        ' --date-max ' + '"' + str(tend+timedelta(0.5)) + '"' + \
        ' --depth-min ' + depth_min + \
        ' --depth-max ' + str(np.nanmax(argo_pres)+1000) + \
        ' --variable ' + 'thetao' + ' ' + \
        ' --variable ' + 'so'  + ' ' + \
        ' --out-dir ' + out_dir + \
        ' --out-name ' + str(id) + '.nc' + ' ' + \
        ' --user ' + 'maristizabalvar' + ' ' + \
        ' --pwd ' +  'MariaCMEMS2018'

        os.system(motuc)
        # Check if file was downloaded

        COP_file = out_dir + '/' + str(id) + '.nc'
        # Check if file was downloaded
        resp = os.system('ls ' + out_dir + '/' + str(id) + '.nc')
        if resp == 0:
            COP = xr.open_dataset(COP_file)

            latCOP = np.asarray(COP.latitude[:])
            lonCOP = np.asarray(COP.longitude[:])
            depth_COP = np.asarray(COP.depth[:])
            tCOP = np.asarray(mdates.num2date(mdates.date2num(COP.time[:])))
        else:
            latCOP = np.empty(1)
            latCOP[:] = np.nan
            lonCOP = np.empty(1)
            lonCOP[:] = np.nan
            tCOP = np.empty(1)
            tCOP[:] = np.nan

        oktimeCOP = np.where(
            mdates.date2num(tCOP) >= mdates.date2num(tini))[0][0]
        oklonCOP = np.where(lonCOP >= argo_lon[0])[0][0]
        oklatCOP = np.where(latCOP >= argo_lat[0])[0][0]

        temp_COP = np.asarray(COP.variables['thetao'][oktimeCOP, :, oklatCOP,
                                                      oklonCOP])
        salt_COP = np.asarray(COP.variables['so'][oktimeCOP, :, oklatCOP,
                                                  oklonCOP])

        # Figure temp
        plt.figure(figsize=(5, 6))
        plt.plot(argo_temp,
                 -argo_pres,
                 '.-',
                 linewidth=2,
                 label='ARGO Float id ' + str(id))
        plt.plot(temp_GOFS,
                 -depth_GOFS,
                 '.-',
                 linewidth=2,
                 label='GOFS 3.1',
                 color='red')
        plt.plot(temp_RTOFS,
                 -depth_RTOFS,
                 '.-',
                 linewidth=2,
                 label='RTOFS',
                 color='g')
        plt.plot(temp_COP,
                 -depth_COP,
                 '.-',
                 linewidth=2,
                 label='Copernicus',
                 color='darkorchid')
        plt.ylim([-1000, 0])
        plt.title('Temperature Profile on '+ str(argo_time[0])[0:13] +
                  '\n [lon,lat] = [' \
                  + str(np.round(argo_lon[0],3)) +',' +\
                      str(np.round(argo_lat[0],3))+']',\
                      fontsize=16)
        plt.ylabel('Depth (m)', fontsize=14)
        plt.xlabel('$^oC$', fontsize=14)
        plt.legend(loc='lower right', fontsize=14)

        file = folder_fig + 'ARGO_vs_GOFS_RTOFS_COP_temp_' + str(id)
        plt.savefig(file, bbox_inches='tight', pad_inches=0.1)

        # Figure salt
        plt.figure(figsize=(5, 6))
        plt.plot(argo_salt,
                 -argo_pres,
                 '.-',
                 linewidth=2,
                 label='ARGO Float id ' + str(id))
        plt.plot(salt_GOFS,
                 -depth_GOFS,
                 '.-',
                 linewidth=2,
                 label='GOFS 3.1',
                 color='red')
        plt.plot(salt_RTOFS,
                 -depth_RTOFS,
                 '.-',
                 linewidth=2,
                 label='RTOFS',
                 color='g')
        plt.plot(salt_COP,
                 -depth_COP,
                 '.-',
                 linewidth=2,
                 label='Copernicus',
                 color='darkorchid')
        plt.ylim([-1000, 0])
        plt.title('Salinity Profile on '+ str(argo_time[0])[0:13] +
                  '\n [lon,lat] = [' \
                  + str(np.round(argo_lon[0],3)) +',' +\
                      str(np.round(argo_lat[0],3))+']',\
                      fontsize=16)
        plt.ylabel('Depth (m)', fontsize=14)
        plt.legend(loc='lower right', fontsize=14)

        file = folder_fig + 'ARGO_vs_GOFS_RTOFS_COP_salt_' + str(id)
        plt.savefig(file, bbox_inches='tight', pad_inches=0.1)
Ejemplo n.º 20
0
def get_coordinates(df, **kw):
    '''
    Example ERDDAP TableDAP URL:

    dataset_url = '%s/tabledap/%s.csvp?latitude,longitude,time&longitude>=-72.0&longitude<=-69&latitude>=38&latitude<=41&time>=1278720000.0&time<=1470787200.0&distinct()' % (all_datasets['server'].iloc[int(i)],all_datasets['Dataset ID'].iloc[int(i)])
    '''
    df_coords = pd.DataFrame()

    # alternate approach to above is iterate the original DataFrame passed (df), stopping either
    #   at final_dataset_limit (10 currently) or the max # of rows in df (conclusion of for loop)
    #   previous enclosing while loop is unnecessary as a result
    final_dataset_limit = 10
    datasets_found = 0
    if df.shape[0] < final_dataset_limit:
        final_dataset_limit = df.shape[0]

    index_random = random.sample(range(0, df.shape[0]), df.shape[0])
    print("index_random: {}".format(index_random))

    #for i in range(subset_datasets.shape[0]):
    for i in index_random:
        server_url = df['server'].iloc[int(i)]
        dataset_id = df['Dataset ID'].iloc[int(i)]
        institution = df['Institution'].iloc[int(i)]

        # skip some difficult datasets for now:
        if "ROMS" in dataset_id or "DOP" in dataset_id:  # skip ROMS model output
            #print("Skipping %s" % server_url + dataset_id)
            continue

        e = ERDDAP(server=server_url, protocol='tabledap', response='csv')
        try:
            print("datasets_found: {}".format(datasets_found))
            # former config for query, replaced with new code below:
            #e.variables=["latitude","longitude"]#,"time"]
            #e.dataset_id = all_datasets['Dataset ID'].iloc[int(i)]
            #e.constraints = {
            #       "time>=": kw['min_time'],
            #       "time<=": kw['max_time'],
            #       "longitude>=": kw['min_lon'],
            #       "longitude<=": kw['max_lon'],
            #       "latitude>=": kw['min_lat'],
            #       "latitude<=": kw['max_lat'],
            #       "distinct" : ()
            #}

            # Generate a download URL via e.get_download_url and pass to Pandas DataFrame via read_csv
            #   we need to use e.constraints here rather than in e.get_download_url to allow appending '>=' '<=' to the contstraints keys to match ERDDAP's API
            #   (parameter signature differs from the search API used above)
            # also add a 'distinct = ()' param, generate a download url, and submit a csv dataset download request to ERDDAP
            #kw["distinct"] = "()"
            e.constraints = {
                "time>=": kw['min_time'],
                "time<=": kw['max_time'],
                "longitude>=": kw['min_lon'],
                "longitude<=": kw['max_lon'],
                "latitude>=": kw['min_lat'],
                "latitude<=": kw['max_lat'],
                "distinct": ()
            }
            url = e.get_download_url(
                #constraints=kw,
                response="csvp",
                dataset_id=df['Dataset ID'].iloc[int(i)],
                variables=["latitude", "longitude"])
            print("Download URL: {}".format(url))

            #coords = pd.read_csv(url, headers=headers)
            coords = pd.read_csv(url)
            coords['dataset_count'] = i
            coords['dataset_download_url'] = url
            coords['Dataset ID'] = dataset_id
            coords['Institution'] = institution

            metadata_url = e.get_info_url(
                dataset_id=df['Dataset ID'].iloc[int(i)], response='csv')

            metadata = pd.read_csv(metadata_url)

            coords['cdm_data_type'] = "".join(
                metadata.loc[metadata["Attribute Name"] == "cdm_data_type",
                             "Value"])

            #get_var_by_attr example (ToDo):
            #e.get_var_by_attr(dataset_id, standard_name='northward_sea_water_velocity')

            print(coords.head())
            df_coords = pd.concat([df_coords, coords])

            # reaching this point in the query means the dataset query was successful, increment
            #   we need to break out of for loop here however if we reach final_dataset_limit to not go over:
            datasets_found += 1
            print("new dataset acquired; datasets_found: {}".format(
                datasets_found))
            if datasets_found == final_dataset_limit: break

        except Exception as ex:
            # can happen if the dataset does not have any features within the query window, just log it here:
            if type(ex).__name__ in ["HTTPError"]:
                print(ex)
            #raise
            pass

    return df_coords
Ejemplo n.º 21
0
    check = os.path.exists(path)
    if not check:
        os.makedirs(path)
    else:
        print(f'"{path}" already exists' )


# Need to put everything together into a dataframe
data_availability = pd.DataFrame(columns=erd.variables)
for i in range(len(days)-1):
    
    # Get the time constraints
    min_time = days[i]
    max_time = days[i+1]
    erd.constraints = {
        'time>=': min_time,
        'time<=': max_time
    }
    
    # Query the relevant data and put into a dataframe
    data_subset = get_erddap_data(0, erd)
        
    # Rename the columns to be consistent with variable names
    for col in data_subset.columns:
        data_subset.rename(columns={col: col.split()[0]}, inplace=True)
            
    # Get the fill_values
    fill_values = get_fill_values(erd)
        
    # Calculate the % available data for each variable in the day time period
    available = {}
    for var in erd.variables:
Ejemplo n.º 22
0
for i in range(len(wmo_mb)):

    print(wmo_mb[i])

    e = ERDDAP(
      server='http://osmc.noaa.gov/erddap',
      protocol='tabledap',
    )
    
    e.response = 'csv'
    e.dataset_id = 'OSMC_30day'
    e.constraints = {
        'time>=': str(x.tm_year)+"-"+str(x.tm_mon).zfill(2)+"-"+str(x.tm_mday).zfill(2)+"T"+str(x.tm_hour).zfill(2)+":00:00Z",
        'longitude>=': -80.0,
        'longitude<=': 80.0,
        'platform_type=': "DRIFTING BUOYS (GENERIC)",
        'platform_code=': str(wmo_mb[i]),
    }        
    e.variables = [
        'platform_code',
        'time',
        'latitude',
        'longitude',
        'sst',
        'slp',
    ]
    try:
        df = e.to_pandas()
    except:
        print("Não há dados para o WMO "+str(wmo_mb[i]))
Ejemplo n.º 23
0
In this notebook we will explore the statistics of the messages IOOS is releasing to GTS.

The first step is to download the data. We will use an ERDDAP server that [hosts the CSV files](https://ferret.pmel.noaa.gov/generic/erddap/files/ioos_obs_counts/) with the ingest data.

from datetime import date

from erddapy import ERDDAP

server = "http://osmc.noaa.gov/erddap"
e = ERDDAP(server=server, protocol="tabledap")

e.dataset_id = "ioos_obs_counts"
e.variables = ["time", "locationID", "region", "sponsor", "met", "wave"]
e.constraints = {
    "time>=": "2019-09",
    "time<": "2020-11",
}

df = e.to_pandas(parse_dates=True)

df["locationID"] = df["locationID"].str.lower()

df.tail()

The table has all the ingest data from 2019-01-01 to 2020-06-01. We can now explore it grouping the data by IOOS Regional Association (RA).

groups = df.groupby("region")

ax = groups.sum().plot(kind="bar", figsize=(11, 3.75))
ax.yaxis.get_major_formatter().set_scientific(False)
ax.set_ylabel("# observations");