Beispiel #1
0
def load_glider(dataset_id='ru32-20190102T1317-profile-sci-rt',
                server="http://slocum-data.marine.rutgers.edu/erddap"):
    ''' Load glider data from erddap.
        input dataset ID and server
        Returns an xarray dataset indexed on time '''

    # should change: write to_netcdf, then check if netcdf exists

    e = ERDDAP(
        server=server,
        protocol="tabledap",
        response="nc",
    )

    e.dataset_id = dataset_id

    gds = e.to_xarray()

    # want to have the dimention be time not obs number
    gds = gds.swap_dims({"obs": "time"})
    gds = gds.sortby("time")

    # drop repeated time values
    gds = gds.sel(time=~gds.indexes['time'].duplicated())

    # get the seafloor depths too

    e2 = ERDDAP(
        server="http://slocum-data.marine.rutgers.edu/erddap",
        protocol="tabledap",
        response="nc",
    )

    # get some of the raw data:
    #     e2.dataset_id = dataset_id[:-14] + 'trajectory-raw-rt'
    e2.dataset_id = dataset_id.replace('profile-sci', 'trajectory-raw')

    e2.variables = ['time', 'm_water_depth', 'm_pitch']

    # this connects to the data and load into an xarray dataset

    gds_raw = e2.to_xarray().drop_dims('trajectory')

    # want to have the dimention be time not obs number

    gds_raw = gds_raw.swap_dims({"obs": "time"})
    gds_raw = gds_raw.sortby("time")

    gds_raw = gds_raw.sel(time=~gds_raw.indexes['time'].duplicated())

    # remove bad values:
    gds_raw['m_water_depth'] = gds_raw.m_water_depth.where(
        gds_raw.m_water_depth > 10, drop=True)

    gds['bottom_depth'] = gds_raw.m_water_depth.interp_like(gds,
                                                            method='nearest')

    return gds
Beispiel #2
0
    def load_data(self, year='2019'):
        self.dfs = {}
        for index, row in self.df.iterrows():
            if (self.glider_id
                    in row['Dataset ID']) and (year in row['Dataset ID']):
                print(row['Dataset ID'])

                try:
                    e = ERDDAP(
                        server=self.server_url,
                        protocol='tabledap',
                        response='csv',
                    )
                    e.dataset_id = row['Dataset ID']
                    e.constraints = self.constraints
                    e.variables = self.variables[row['Dataset ID']]
                except HTTPError:
                    print('Failed to generate url {}'.format(
                        row['Dataset ID']))
                    continue
                self.dfs.update({
                    row['Dataset ID']:
                    e.to_pandas(
                        index_col='time (UTC)',
                        parse_dates=True,
                        skiprows=(1, )  # units information can be dropped.
                    )
                })

        return (self.dfs)
Beispiel #3
0
def active_drifters(bbox=None, time_start=None, time_end=None):
    bbox = bbox or [-100, -40, 18, 60]
    time_end = time_end or dt.date.today()
    time_start = time_start or (time_end - dt.timedelta(days=1))
    t0 = time_start.strftime('%Y-%m-%dT%H:%M:%SZ')
    t1 = time_end.strftime('%Y-%m-%dT%H:%M:%SZ')

    e = ERDDAP(server='OSMC', protocol="tabledap")
    e.dataset_id = "gdp_interpolated_drifter"

    # Setting constraints
    e.constraints = {
        "time>=": t0,
        "time<=": t1,
        'longitude>=': bbox[0],
        'longitude<=': bbox[1],
        'latitude>=': bbox[2],
        'latitude<=': bbox[3],
    }

    # e.variables = [
    #     "WMO",
    #     "latitude",
    #     "longitude",
    #     "time",
    # ]

    try:
        df = e.to_pandas()
    except ValueError:
        return pd.DataFrame()

    return df
def retrieve_variable_names_erddap_server(url_erddap, dataset_id):
    """
    Created on Tue Nov  3 11:26:05 2020

    @author: aristizabal

    This function retrieves the variable names from the IOOS
    and Rutgers erddapp glider servers.

    Inputs:
    url_erddap: url address of erddap server
                Example: 'https://data.ioos.us/gliders/erddap'
    dataset_id: Example: 'ng231-20190901T0000'

    Outputs:
    variables: list of variables for the requested dataset_id

    """

    from erddapy import ERDDAP

    e = ERDDAP(server=url_erddap, protocol='tabledap', response='nc')

    e.dataset_id = dataset_id

    df = e.to_pandas()

    variable_names = [var for var in df.columns]
    print('List of available variables ')
    print(variable_names)

    return variable_names
Beispiel #5
0
def get_erddap_dataset(server, protocol, file_type, ds_id, var_list=None):
    e = ERDDAP(server=server, protocol=protocol, response=file_type)
    e.dataset_id = ds_id
    if var_list:
        e.variables = var_list
    ds = e.to_xarray()
    ds = ds.sortby(ds.time)
    return ds
Beispiel #6
0
def active_argo_floats(bbox=None, time_start=None, time_end=None, floats=None):
    """

    :param lon_lims: list containing westernmost longitude and easternmost latitude
    :param lat_lims: list containing southernmost latitude and northernmost longitude
    :param time_start: time to start looking for floats
    :param time_end: time to end looking for floats
    :return:
    """

    bbox = bbox or [-100, -45, 5, 46]
    time_end = time_end or dt.date.today()
    time_start = time_start or (time_end - dt.timedelta(days=1))
    floats = floats or False

    constraints = {
        'time>=': str(time_start),
        'time<=': str(time_end),
    }

    if bbox:
        constraints['longitude>='] = bbox[0]
        constraints['longitude<='] = bbox[1]
        constraints['latitude>='] = bbox[2]
        constraints['latitude<='] = bbox[3]

    if floats:
        constraints['platform_number='] = floats

    variables = [
        'platform_number',
        'time',
        'pres',
        'longitude',
        'latitude',
        'temp',
        'psal',
    ]

    e = ERDDAP(
        server='IFREMER',
        protocol='tabledap',
        response='nc'
    )

    e.dataset_id = 'ArgoFloats'
    e.constraints = constraints
    e.variables = variables

    try:
        df = e.to_pandas(
            parse_dates=['time (UTC)'],
            skiprows=(1,)  # units information can be dropped.
        ).dropna()
    except HTTPError:
        df = pd.DataFrame()

    return df
Beispiel #7
0
def get_erddap_dataset(server, ds_id, variables=None, constraints=None):
    variables = variables or None
    constraints = constraints or None

    e = ERDDAP(server=server,
               protocol='tabledap',
               response='nc')
    e.dataset_id = ds_id
    if constraints:
        e.constraints = constraints
    if variables:
        e.variables = variables
    ds = e.to_xarray()
    ds = ds.sortby(ds.time)
    return ds
Beispiel #8
0
def test_erddap_requests_kwargs():
    """ Test that an ERDDAP instance can have requests_kwargs attribute assigned
    and are passed to the underlying methods """

    base_url = "http://www.neracoos.org/erddap"
    timeout_seconds = 1  # request timeout in seconds
    slowwly_milliseconds = (timeout_seconds + 1) * 1000
    slowwly_url = ("http://slowwly.robertomurray.co.uk/delay/" +
                   str(slowwly_milliseconds) + "/url/" + base_url)

    connection = ERDDAP(slowwly_url)
    connection.dataset_id = "M01_sbe37_all"
    connection.protocol = "tabledap"

    connection.requests_kwargs["timeout"] = timeout_seconds

    with pytest.raises(ReadTimeout):
        connection.to_xarray()
def check_dataset_empty(url_erddap,dataset_id,date_ini,date_end,lon_lim,lat_lim):

    from erddapy import ERDDAP

    constraints = {
        'time>=': date_ini,
        'time<=': date_end,
        'latitude>=': lat_lim[0],
        'latitude<=': lat_lim[1],
        'longitude>=': lon_lim[0],
        'longitude<=': lon_lim[1],
        }

    variable_names = [
            'depth',
            'latitude',
            'longitude',
            'time',
            'temperature',
            'salinity'
            ]

    e = ERDDAP(
            server=url_erddap,
            protocol='tabledap',
            response='nc'
            )

    e.dataset_id = dataset_id
    e.constraints = constraints
    e.variables = variable_names

    # Converting glider data to data frame
    # Cheching that data frame has data
    df = e.to_pandas()
    if len(df) < 4:
        empty_dataset = True
    else:
        empty_dataset = False

    return empty_dataset
def get_erddap_dataset(ds_id, variables=None, constraints=None, filetype=None):
    """
    Returns a netcdf dataset for a specified dataset ID (or dataframe if dataset cannot be converted to xarray)
    :param ds_id: dataset ID e.g. ng314-20200806T2040
    :param variables: optional list of variables
    :param constraints: optional list of constraints
    :param filetype: optional filetype to return, 'nc' (default) or 'dataframe'
    :return: netcdf dataset
    """
    variables = variables or None
    constraints = constraints or None
    filetype = filetype or 'nc'

    e = ERDDAP(server='NGDAC', protocol='tabledap', response='nc')
    e.dataset_id = ds_id
    if constraints:
        e.constraints = constraints
    if variables:
        e.variables = variables
    if filetype == 'nc':
        try:
            ds = e.to_xarray()
            ds = ds.sortby(ds.time)
        except OSError:
            print('No dataset available for specified constraints: {}'.format(
                ds_id))
            ds = []
        except TypeError:
            print('Cannot convert to xarray, providing dataframe: {}'.format(
                ds_id))
            ds = e.to_pandas().dropna()
    elif filetype == 'dataframe':
        ds = e.to_pandas().dropna()
    else:
        print('Unrecognized filetype: {}. Needs to  be "nc" or "dataframe"'.
              format(filetype))

    return ds
    def load_data(self,year='2019'):
        self.dfs = {}
        for index,row in self.df.iterrows():
            if (self.glider_id in row['Dataset ID']) and (year in row['Dataset ID']):
                print(row['Dataset ID'])

                try:
                    e = ERDDAP(server=self.server_url,
                        protocol='tabledap',
                        response='csv',
                    )
                    e.dataset_id=row['Dataset ID']
                    e.constraints=self.constraints
                    e.variables=self.variables[row['Dataset ID']]
                except HTTPError:
                    print('Failed to generate url {}'.format(row['Dataset ID']))
                    continue
                self.dfs.update({row['Dataset ID']: e.to_pandas(
                                        index_col='time (UTC)',
                                        parse_dates=True,
                                        skiprows=(1,)  # units information can be dropped.
                                        )})  
                
        return(self.dfs)
Beispiel #12
0
def get_erddap_data(dataset_id):
    '''
    :param dataset_id: the deployment name example:'ce_311-20200708T1723'
    :return: pandas DataFrame with deployment variable values
    '''
    e = ERDDAP(
        server='https://gliders.ioos.us/erddap',
        protocol='tabledap',
    )
    e.response = 'csv'
    e.dataset_id = dataset_id
    e.variables = [
        'depth',
        'latitude',
        'longitude',
        'salinity',
        'temperature',
        'conductivity',
        'density',
        'time',
    ]

    df = e.to_pandas()
    return df
def read_glider_variables_erddap_server(url_erddap,dataset_id,\
                                   lat_lim,lon_lim,\
                                   variable_names=['time'],
                                    **kwargs):
    """
    Created on Tue Nov  3 11:26:05 2020

    @author: aristizabal

    This function reads glider variables from the IOOS
    and Rutgers erddapp glider servers.

    Inputs:
    url_erddap: url address of erddap server
                Example: 'https://data.ioos.us/gliders/erddap'
    dataset_id: Example: 'ng231-20190901T0000'
    variable_names: list of variable names.
                    Example:
                            variable_names = ['depth',
                                            'latitude',
                                            'longitude',
                                            'time',
                                            'temperature',
                                            'salinity']
                    The default value is variable_names=['time']
    lat_lim: latitude limits for the search.
            Example, lat_lim = [38.0,40.0]
    lon_lim: longitude limits for the search.
            Example, lon_lim = [-75.0,-72.0]
    date_ini: initial date of time window.
        This function accepts the data formats '%Y-%m-%d T %H:%M:%S Z' and '%Y/%m/%d/%H'.
        Examaple: date_ini = '2018-08-02T00:00:00Z' or '2018/08/02/00'
    date_end: initial date of time window.
        This function uses the data format '%Y-%m-%d T %H:%M:%S Z'.
        Examaple: date_ini = '2018-08-10T00:00:00Z' and '2018/08/10/00'

    Outputs:
    df: Pandas data frame with all the variables requested as vectors

    """

    from erddapy import ERDDAP
    import numpy as np

    date_ini = kwargs.get('date_ini', None)
    date_end = kwargs.get('date_end', None)

    # Find time window of interest
    if np.logical_or(date_ini == None, date_end == None):
        constraints = {
            'latitude>=': lat_lim[0],
            'latitude<=': lat_lim[1],
            'longitude>=': lon_lim[0],
            'longitude<=': lon_lim[1],
        }
    else:
        constraints = {
            'time>=': date_ini,
            'time<=': date_end,
            'latitude>=': lat_lim[0],
            'latitude<=': lat_lim[1],
            'longitude>=': lon_lim[0],
            'longitude<=': lon_lim[1],
        }

    e = ERDDAP(server=url_erddap, protocol='tabledap', response='nc')

    e.dataset_id = dataset_id
    e.constraints = constraints
    e.variables = variable_names

    # Converting glider data to data frame
    # Cheching that data frame has data
    df = e.to_pandas()
    if len(df) > 3:

        df = e.to_pandas(parse_dates=True)

    return df
def read_glider_data_erddap_server(url_erddap,dataset_id,\
                                   lat_lim,lon_lim,scatter_plot,**kwargs):
    """
    Created on Tue Feb  5 10:05:37 2019

    @author: aristizabal

    This function reads glider data from the IOOS
    Data Assembly Center (DAC).

    Inputs:
    url_erddap: url address of thredds server
                Example: 'https://data.ioos.us/gliders/erddap'
    dataset_id: this id is retrieved from the glider DAC using the
               function "retrieve_glider_id_erddap_server".
               Example: 'ru30-20180705T1825'
    lat_lim: latitude limits for the search.
            Example, lat_lim = [38.0,40.0]
    lon_lim: longitude limits for the search.
            Example, lon_lim = [-75.0,-72.0]
    date_ini: initial date of time window.
        This function accepts the data formats '%Y-%m-%d T %H:%M:%S Z' and '%Y/%m/%d/%H'.
        Examaple: date_ini = '2018-08-02T00:00:00Z' or '2018/08/02/00'
    date_end: initial date of time window.
        This function uses the data format '%Y-%m-%d T %H:%M:%S Z'.
        Examaple: date_ini = '2018-08-10T00:00:00Z' and '2018/08/10/00'
    scatter_plot: if equal to 'yes' then a scatter plot
            of the glider transect is plotted

    Outputs:
    tempg: all the glider profiles of temperature within the user defined time window
    saltg: all the glider profiles of salinity within the user defined time window
    latg: latitude within the user defined time window
    long: longitude within the user defined time window
    timeg: user defined time window
    depthg: depth vector for all profiles
    """

    from erddapy import ERDDAP
    import matplotlib.pyplot as plt
    import matplotlib.dates as mdates
    import cmocean
    import numpy as np

    date_ini = kwargs.get('date_ini', None)
    date_end = kwargs.get('date_end', None)

    # Find time window of interest
    if np.logical_or(date_ini == None, date_end == None):
        constraints = {
            'latitude>=': lat_lim[0],
            'latitude<=': lat_lim[1],
            'longitude>=': lon_lim[0],
            'longitude<=': lon_lim[1],
        }
    else:
        constraints = {
            'time>=': date_ini,
            'time<=': date_end,
            'latitude>=': lat_lim[0],
            'latitude<=': lat_lim[1],
            'longitude>=': lon_lim[0],
            'longitude<=': lon_lim[1],
        }

    variables = [
        'depth', 'latitude', 'longitude', 'time', 'temperature', 'salinity'
    ]

    e = ERDDAP(server=url_erddap, protocol='tabledap', response='nc')

    e.dataset_id = dataset_id
    e.constraints = constraints
    e.variables = variables

    # Converting glider data to data frame
    # Cheching that data frame has data
    df = e.to_pandas()
    if len(df) > 3:

        df = e.to_pandas(
            index_col='time (UTC)',
            parse_dates=True,
            skiprows=(1, )  # units information can be dropped.
        ).dropna()

        # Coverting glider vectors into arrays
        timeg, ind = np.unique(df.index.values, return_index=True)
        latg = df['latitude (degrees_north)'].values[ind]
        long = df['longitude (degrees_east)'].values[ind]

        dg = df['depth (m)'].values
        vg1 = df[df.columns[3]].values
        vg2 = df[df.columns[4]].values

        zn = np.int(np.max(np.diff(np.hstack([ind, len(dg)]))))

        depthg = np.empty((zn, len(timeg)))
        depthg[:] = np.nan
        tempg = np.empty((zn, len(timeg)))
        tempg[:] = np.nan
        saltg = np.empty((zn, len(timeg)))
        saltg[:] = np.nan

        for i, ii in enumerate(ind):
            if i < len(timeg) - 1:
                depthg[0:len(dg[ind[i]:ind[i + 1]]), i] = dg[ind[i]:ind[i + 1]]
                tempg[0:len(vg1[ind[i]:ind[i + 1]]),
                      i] = vg1[ind[i]:ind[i + 1]]
                saltg[0:len(vg2[ind[i]:ind[i + 1]]),
                      i] = vg2[ind[i]:ind[i + 1]]
            else:
                depthg[0:len(dg[ind[i]:len(dg)]), i] = dg[ind[i]:len(dg)]
                tempg[0:len(vg1[ind[i]:len(vg1)]), i] = vg1[ind[i]:len(vg1)]
                saltg[0:len(vg2[ind[i]:len(vg2)]), i] = vg2[ind[i]:len(vg2)]

        # Scatter plot
        if scatter_plot == 'yes':

            color_map = cmocean.cm.thermal
            varg = tempg
            timeg_matrix = np.tile(timeg.T, (depthg.shape[0], 1))
            ttg = np.ravel(timeg_matrix)
            dg = np.ravel(depthg)
            teg = np.ravel(varg)

            kw = dict(c=teg, marker='*', edgecolor='none')

            fig, ax = plt.subplots(figsize=(10, 3))
            cs = ax.scatter(ttg, -dg, cmap=color_map, **kw)
            #fig.colorbar(cs)
            ax.set_xlim(timeg[0], timeg[-1])

            ax.set_ylabel('Depth (m)', fontsize=14)
            cbar = plt.colorbar(cs)
            cbar.ax.set_ylabel('Temperature ($^oC$)', fontsize=14)
            ax.set_title(dataset_id, fontsize=16)
            xfmt = mdates.DateFormatter('%H:%Mh\n%d-%b')
            ax.xaxis.set_major_formatter(xfmt)
            plt.ylim([-np.nanmax(dg), 0])

            color_map = cmocean.cm.haline
            varg = saltg
            timeg_matrix = np.tile(timeg.T, (depthg.shape[0], 1))
            ttg = np.ravel(timeg_matrix)
            dg = np.ravel(depthg)
            teg = np.ravel(varg)

            kw = dict(c=teg, marker='*', edgecolor='none')

            fig, ax = plt.subplots(figsize=(10, 3))
            cs = ax.scatter(ttg, -dg, cmap=color_map, **kw)
            #fig.colorbar(cs)
            ax.set_xlim(timeg[0], timeg[-1])

            ax.set_ylabel('Depth (m)', fontsize=14)
            cbar = plt.colorbar(cs)
            cbar.ax.set_ylabel('Salinity', fontsize=14)
            ax.set_title(dataset_id, fontsize=16)
            xfmt = mdates.DateFormatter('%H:%Mh\n%d-%b')
            ax.xaxis.set_major_formatter(xfmt)
            plt.ylim([-np.nanmax(dg), 0])

    else:
        tempg = np.nan
        saltg = np.nan
        timeg = np.nan
        latg = np.nan
        long = np.nan
        depthg = np.nan

    return tempg, saltg, timeg, latg, long, depthg
Beispiel #15
0
def active_gliders(bbox=None, time_start=None, time_end=dt.date.today(), glider_id=None):
    bbox = bbox or [-100, -40, 18, 60]
    time_start = time_start or (time_end - dt.timedelta(days=1))
    t0 = time_start.strftime('%Y-%m-%dT%H:%M:%SZ')
    t1 = time_end.strftime('%Y-%m-%dT%H:%M:%SZ')
    glider_id = glider_id or None

    e = ERDDAP(server='NGDAC')

    # Grab every dataset available
    # datasets = pd.read_csv(e.get_search_url(response='csv', search_for='all'))

    # Search constraints
    kw = dict()
    kw['min_time'] = t0
    kw['max_time'] = t1

    if bbox:
        kw['min_lon'] = bbox[0]
        kw['max_lon'] = bbox[1]
        kw['min_lat'] = bbox[2]
        kw['max_lat'] = bbox[3]

    if glider_id:
        search = glider_id
    else:
        search = None

    search_url = e.get_search_url(search_for=search, response='csv', **kw)

    try:
        # Grab the results
        search = pd.read_csv(search_url)
    except:
        # return empty dataframe if there are no results
        return pd.DataFrame()

    # Extract the IDs
    gliders = search['Dataset ID'].values

    msg = 'Found {} Glider Datasets:\n\n{}'.format
    print(msg(len(gliders), '\n'.join(gliders)))

    # Setting constraints
    constraints = {
            'time>=': t0,
            'time<=': t1,
            'longitude>=': bbox[0],
            'longitude<=': bbox[1],
            'latitude>=': bbox[2],
            'latitude<=': bbox[3],
            }

    variables = [
            'depth',
            'latitude',
            'longitude',
            'time',
            'temperature',
            'salinity',
            ]

    e = ERDDAP(
            server='NGDAC',
            protocol='tabledap',
            response='nc'
    )

    glider_dfs = []

    for id in gliders:
        # print('Reading ' + id)
        e.dataset_id = id
        e.constraints = constraints
        e.variables = variables

        # checking data frame is not empty
        try:
            df = e.to_pandas(
                index_col='time (UTC)',
                parse_dates=True,
                skiprows=(1,)  # units information can be dropped.
            ).dropna()
        except:
            continue
        df = df.reset_index()
        df['dataset_id'] = id
        df = df.set_index(['dataset_id', 'time (UTC)'])
        glider_dfs.append(df)

    try:
        ndf = pd.concat(glider_dfs)
    except ValueError:
        return pd.DataFrame()

    return ndf
def read_glider_data_erddap_Rutgers_server(url_erddap,dataset_id,\
                                   lat_lim,lon_lim,scatter_plot,**kwargs):

    from erddapy import ERDDAP
    import matplotlib.pyplot as plt
    import matplotlib.dates as mdates
    import cmocean
    import numpy as np

    date_ini = kwargs.get('date_ini', None)
    date_end = kwargs.get('date_end', None)

    # Find time window of interest
    if np.logical_or(date_ini == None, date_end == None):
        constraints = {
            'latitude>=': lat_lim[0],
            'latitude<=': lat_lim[1],
            'longitude>=': lon_lim[0],
            'longitude<=': lon_lim[1],
        }
    else:
        constraints = {
            'time>=': date_ini,
            'time<=': date_end,
            'latitude>=': lat_lim[0],
            'latitude<=': lat_lim[1],
            'longitude>=': lon_lim[0],
            'longitude<=': lon_lim[1],
        }

    variables = [
        'depth', 'latitude', 'longitude', 'time', 'temperature', 'salinity'
    ]

    e = ERDDAP(server=url_erddap, protocol='tabledap', response='nc')

    e.dataset_id = dataset_id
    e.constraints = constraints
    e.variables = variables

    # Converting glider data to data frame
    # Cheching that data frame has data
    df = e.to_pandas()
    if len(df) != 0:

        df = e.to_pandas(
            index_col='time (UTC)',
            parse_dates=True,
            skiprows=(1, )  # units information can be dropped.
        ).dropna()

    dg = df['depth (m)'].values
    tg = df.index.values
    vg1 = df[df.columns[3]].values
    vg2 = df[df.columns[4]].values

    upcast = np.where(np.diff(dg) < 0)[0]
    oku = np.where(np.diff(upcast) > 1)[0]
    end_upcast = upcast[oku]

    downcast = np.where(np.diff(dg) > 0)[0]
    okd = np.where(np.diff(downcast) > 1)[0]
    end_downcast = downcast[okd]

    ind = np.hstack(
        [0, np.unique(np.hstack([end_upcast, end_downcast])),
         len(dg)])
    zn = np.max(np.diff(ind))

    depthg = np.empty((zn, len(ind)))
    depthg[:] = np.nan
    timeg = np.empty((zn, len(ind)))
    timeg[:] = np.nan
    tempg = np.empty((zn, len(ind)))
    tempg[:] = np.nan
    saltg = np.empty((zn, len(ind)))
    saltg[:] = np.nan

    for i in np.arange(len(ind)):
        if i == 0:
            indd = np.argsort(dg[ind[i]:ind[i + 1] + 2])
            depthg[0:len(dg[ind[i]:ind[i + 1] + 2]),
                   i] = dg[ind[i]:ind[i + 1] + 2][indd]
            timeg[0:len(dg[ind[i]:ind[i + 1] + 2]),
                  i] = mdates.date2num(tg[ind[i]:ind[i + 1] + 2][indd])
            tempg[0:len(vg1[ind[i]:ind[i + 1] + 2]),
                  i] = vg1[ind[i]:ind[i + 1] + 2][indd]
            saltg[0:len(vg2[ind[i]:ind[i + 1] + 2]),
                  i] = vg2[ind[i]:ind[i + 1] + 2][indd]
        if i < len(ind) - 1:
            indd = np.argsort(dg[ind[i] + 1:ind[i + 1] + 2])
            depthg[0:len(dg[ind[i] + 1:ind[i + 1] + 2]),
                   i] = dg[ind[i] + 1:ind[i + 1] + 2][indd]
            timeg[0:len(dg[ind[i] + 1:ind[i + 1] + 2]),
                  i] = mdates.date2num(tg[ind[i] + 1:ind[i + 1] + 2][indd])
            tempg[0:len(vg1[ind[i] + 1:ind[i + 1] + 2]),
                  i] = vg1[ind[i] + 1:ind[i + 1] + 2][indd]
            saltg[0:len(vg2[ind[i] + 1:ind[i + 1] + 2]),
                  i] = vg2[ind[i] + 1:ind[i + 1] + 2][indd]
        else:
            indd = np.argsort(dg[ind[i] + 1:len(dg)])
            depthg[0:len(dg[ind[i] + 1:len(dg)]),
                   i] = dg[ind[i] + 1:len(dg)][indd]
            timeg[0:len(dg[ind[i] + 1:len(dg)]),
                  i] = mdates.date2num(tg[ind[i] + 1:len(dg)][indd])
            tempg[0:len(vg1[ind[i] + 1:len(vg1)]),
                  i] = vg1[ind[i] + 1:len(vg1)][indd]
            saltg[0:len(vg2[ind[i] + 1:len(vg2)]),
                  i] = vg2[ind[i] + 1:len(vg2)][indd]

    # Scatter plot
    if scatter_plot == 'yes':

        color_map = cmocean.cm.thermal
        varg = tempg
        #timeg_matrix = np.tile(timeg.T,(depthg.shape[0],1))
        ttg = np.ravel(timeg)
        dg = np.ravel(depthg)
        teg = np.ravel(varg)

        kw = dict(c=teg, marker='*', edgecolor='none')

        fig, ax = plt.subplots(figsize=(10, 3))
        cs = ax.scatter(ttg, -dg, cmap=color_map, **kw)
        #fig.colorbar(cs)
        ax.set_xlim(np.nanmin(ttg), np.nanmax(ttg))

        ax.set_ylabel('Depth (m)', fontsize=14)
        cbar = plt.colorbar(cs)
        cbar.ax.set_ylabel('Temperature ($^oC$)', fontsize=14)
        ax.set_title(dataset_id, fontsize=16)
        xfmt = mdates.DateFormatter('%H:%Mh\n%d-%b')
        ax.xaxis.set_major_formatter(xfmt)
        plt.ylim([-np.nanmax(dg), 0])

        color_map = cmocean.cm.haline
        varg = saltg
        #timeg_matrix = np.tile(timeg.T,(depthg.shape[0],1))
        ttg = np.ravel(timeg)
        dg = np.ravel(depthg)
        teg = np.ravel(varg)

        kw = dict(c=teg, marker='*', edgecolor='none')

        fig, ax = plt.subplots(figsize=(10, 3))
        cs = ax.scatter(ttg, -dg, cmap=color_map, **kw)
        #fig.colorbar(cs)
        ax.set_xlim(np.nanmin(ttg), np.nanmax(ttg))

        ax.set_ylabel('Depth (m)', fontsize=14)
        cbar = plt.colorbar(cs)
        cbar.ax.set_ylabel('Salinity', fontsize=14)
        ax.set_title(dataset_id, fontsize=16)
        xfmt = mdates.DateFormatter('%H:%Mh\n%d-%b')
        ax.xaxis.set_major_formatter(xfmt)
        plt.ylim([-np.nanmax(dg), 0])

    return tempg, saltg, timeg, latg, long, depthg
from pathlib import Path
import pandas as pd
from erddapy import ERDDAP


path = Path().absolute()
fname = path.joinpath("data", "water_level_example.csv")

if fname.is_file():
    data = pd.read_csv(fname, parse_dates=["time (UTC)"])
else:
    e = ERDDAP(
        server="http://erddap.aoos.org/erddap/",
        protocol="tabledap"
    )
    e.dataset_id = "kotzebue-alaska-water-level"
    e.constraints = {
        "time>=": "2018-09-05T21:00:00Z",
        "time<=": "2019-07-10T19:00:00Z",
    }
    e.variables = [
        variable_name,
        "time",
        "z",
    ]
    data = e.to_pandas(
        index_col="time (UTC)",
        parse_dates=True,
    )
    data["timestamp"] = data.index.astype("int64") // 1e9
    data.to_csv(fname)
Beispiel #18
0
def get_ndbc(bbox=None, time_start=None, time_end=None, buoy=None):
    bbox = bbox or [-100, -45, 5, 46]
    time_end = time_end or dt.date.today()
    time_start = time_start or (time_end - dt.timedelta(days=1))
    buoy = buoy or False
    time_formatter = '%Y-%m-%dT%H:%M:%SZ'

    e = ERDDAP(
        server='CSWC',
        protocol='tabledap',
        response='csv'
    )

    e.dataset_id = 'cwwcNDBCMet'
    e.constraints = {
        'time>=': time_start.strftime(time_formatter),
        'time<=': time_end.strftime(time_formatter),
    }

    if bbox:
        e.constraints['longitude>='] = bbox[0]
        e.constraints['longitude<='] = bbox[1]
        e.constraints['latitude>='] = bbox[2]
        e.constraints['latitude<='] = bbox[3]

    e.variables = [
        "station",
        "latitude",
        "longitude",
        "time"
    ]

    if buoy:
        e.constraints['station='] = buoy

    df = e.to_pandas(
        parse_dates=['time (UTC)'],
        skiprows=(1,)  # units information can be dropped.
    ).dropna()

    stations = df.station.unique()

    # e.variables = [
    #     "station",
    #     "latitude",
    #     "longitude",
    #     "wd",
    #     "wspd",
    #     "gst",
    #     "wvht",
    #     "dpd",
    #     "apd",
    #     "mwd",
    #     "bar",
    #     "atmp",
    #     "wtmp",
    #     "dewp",
    #     # "vis",
    #     # "ptdy",
    #     # "tide",
    #     "wspu",
    #     "wspv",
    #     "time",
    # ]

    try:
        df = e.to_pandas(
            parse_dates=['time (UTC)'],
            skiprows=(1,)  # units information can be dropped.
        ).dropna()
    except HTTPError:
        df = pd.DataFrame()

    return df
Beispiel #19
0
[wmo_mb,sat]=consulta_estacao()

x=time.gmtime(time.time()-3600*24*100)

for i in range(len(wmo_mb)):

    print(wmo_mb[i])

    e = ERDDAP(
      server='http://osmc.noaa.gov/erddap',
      protocol='tabledap',
    )
    
    e.response = 'csv'
    e.dataset_id = 'OSMC_30day'
    e.constraints = {
        'time>=': str(x.tm_year)+"-"+str(x.tm_mon).zfill(2)+"-"+str(x.tm_mday).zfill(2)+"T"+str(x.tm_hour).zfill(2)+":00:00Z",
        'longitude>=': -80.0,
        'longitude<=': 80.0,
        'platform_type=': "DRIFTING BUOYS (GENERIC)",
        'platform_code=': str(wmo_mb[i]),
    }        
    e.variables = [
        'platform_code',
        'time',
        'latitude',
        'longitude',
        'sst',
        'slp',
    ]
Beispiel #20
0
def grid_glider(
        dataset_id,
        varz2d=[
            'potential_temperature', 'salinity', 'cdom', 'chlorophyll_a',
            'beta_700nm'
        ],
        zgrid=np.arange(0, 1000, 5),
):
    '''grid the glider data from RUCOOL Erddap. this needs work'''
    import xarray as xr
    import pandas as pd
    from erddapy import ERDDAP

    from scipy.signal import find_peaks
    from scipy import stats
    e = ERDDAP(
        server="http://slocum-data.marine.rutgers.edu/erddap",
        protocol="tabledap",
        response="nc",
    )

    # get the science data:
    e.dataset_id = dataset_id

    # this connects to the data and load into an pandas dataframe
    ds = e.to_pandas()
    # remove the spaces from the column names
    ds.columns = ds.columns.str.split(' ').str[0]

    # get the time to be a datetime object
    ds['time'] = pd.to_datetime(ds['time'])

    # put the times in order
    ds = ds.sort_values(by=['time'])

    # fill nans in dpeth for the profile breakup
    interpd = ds.depth.interpolate()

    # find the top and bottom of each profile
    apogee, prop = find_peaks(interpd.values,
                              threshold=None,
                              distance=None,
                              prominence=50)

    perogee, prop = find_peaks(-1 * interpd.values,
                               threshold=None,
                               distance=None,
                               prominence=50)

    # stack the index of the turning points into one vector
    turns = np.sort(np.append(apogee, perogee))

    # this is your depth grid, you can set:
    zgrd = zgrid

    # list of variables to grid in 2d:
    # you choose from the columns of the science data
    dataz = varz2d

    # this is a dict to hold our gridded stuff
    # until we make a dataset later
    d2 = {}

    # loop on the variables you want to bin
    for varz in dataz:
        values = ds[varz]  # grab some data

        #this thing below bins the data
        ret = stats.binned_statistic_2d(ds.index.values,
                                        ds.depth,
                                        values,
                                        statistic='mean',
                                        bins=[turns, zgrd])
        d2[varz] = ret.statistic.T

    # things to bin in the x direction
    oneDvars = ['latitude', 'longitude', 'time', 'u', 'v']

    # NB: u, v only have one value per dive sequence, so only half the number profiles!
    # actually, its weirder than that... not sure there are more than half...

    # dict to hold our 1d bins
    d1 = {}

    # loop on 1d stuff:
    for thing in oneDvars:
        if thing == 'time':
            bin_means, bin_edges, binnumber = stats.binned_statistic(
                ds.index.values,
                ds[thing].astype(int),
                statistic='mean',
                bins=turns)
            bin_means = pd.to_datetime(bin_means)
        else:

            bin_means, bin_edges, binnumber = stats.binned_statistic(
                ds.index.values,
                ds[thing].values,
                statistic=np.nanmean,
                bins=turns)
        d1[thing] = bin_means

    # need the depth grid centers
    zgrd_ctr = zgrd[:-1] + np.diff(zgrd).mean() / 2

    # create the dataset
    ds_gridded = xr.Dataset(coords={
        'date': d1['time'].values,
        'depth': zgrd_ctr,
        'lat': ('date', d1['latitude']),
        'lon': ('date', d1['longitude'])
    },
                            data_vars={
                                'u': ('date', d1['u']),
                                'v': ('date', d1['v'])
                            })

    # add the other data
    for varz in dataz:
        ds_gridded[varz] = (('depth', 'date'), d2[varz])

    return ds_gridded
Beispiel #21
0
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
from erddapy import ERDDAP
import pandas as pd
import seaborn as sns
sns.set(rc={'figure.figsize': (11, 4)})


e = ERDDAP(
    server='https://erddap.marine.ie/erddap',
    protocol='tabledap',
)

e.dataset_id = 'IWBNetwork'

e.constraints = {
    'time>=': '2015-06-28T00:00:00Z',
    'station_id=': 'M3'
}

e.variables = [
    'time',
    'AtmosphericPressure',
    'WindDirection',
    'WindSpeed',
    'WaveHeight',
    'WavePeriod',
    'MeanWaveDirection',
    # 'Hmax',
    # 'AirTemperature',
    'SeaTemperature'
    'time>=': min_time,
    'time<=': max_time,
    'latitude>=': lat_lim[0],
    'latitude<=': lat_lim[-1],
    'longitude>=': lon_lim[0],
    'longitude<=': lon_lim[-1],
}

variables = ['time', 'latitude', 'longitude']

#%%

e = ERDDAP(server=server, protocol='tabledap', response='nc')

for id in gliders:
    e.dataset_id = id
    e.constraints = constraints
    e.variables = variables

    df = e.to_pandas(parse_dates=True)

    print(id, df.index[-1])

#%% Reading bathymetry data

ncbath = xr.open_dataset(bath_file)
bath_lat = ncbath.variables['lat'][:]
bath_lon = ncbath.variables['lon'][:]
bath_elev = ncbath.variables['elevation'][:]

oklatbath = np.logical_and(bath_lat >= lat_lim[0], bath_lat <= lat_lim[-1])
Beispiel #23
0
constraints = {
    "time>=": "2016-07-10T00:00:00Z",
    "time<=": "2017-02-10T00:00:00Z",
    "latitude>=": 38.0,
    "latitude<=": 41.0,
    "longitude>=": -72.0,
    "longitude<=": -69.0,
}

from erddapy import ERDDAP

e = ERDDAP(server=server, protocol=protocol,)


e.dataset_id = dataset_id
e.variables = variables
e.constraints = constraints

print(e.get_download_url())

If we change the response to `html` we can visualize the page.

def show_iframe(src):
    from IPython.display import HTML

    iframe = '<iframe src="{src}" width="100%" height="950"></iframe>'.format
    return HTML(iframe(src=src))


show_iframe(e.get_download_url(response="html"))
datasets = to_df(url)['Dataset ID']
datasets

# This returns all of the datasets available for the Coastal Pioneer Surface Mooring. The three available nodes are:
# * BUOY (surface buoy)
# * MFN (multifunction node - on the bottom of the ocean)
# * NSIF (near-surface instrument frame - located at 7 m depth)
#
# First, lets try the CTDBP on the NSIF:

url = erd.get_search_url(search_for='"CP01CNSM NSIF CTDBP"', response='csv')

datasets = to_df(url)['Dataset ID']
datasets

erd.dataset_id = datasets[0]

# Check what variables are available on the dataset:

info_url = erd.get_info_url(response='html')
show_iframe(info_url)

info_url = erd.get_info_url(response='csv')

info_df = to_df(info_url)
info_df

info_df[info_df['Row Type'] == 'variable']

# Take a look at the variables with standard names:
Beispiel #25
0
    'time>=': '2018-06-01T00:00:00Z',
    'time<=': '2018-11-30T00:00:00Z',
    'latitude>=': 15.0,
    'latitude<=': 45.0,
    'longitude>=': -100.0,
    'longitude<=': -60.0,
}

variables = ['latitude', 'longitude', 'time']

#%%

e = ERDDAP(server=server, protocol='tabledap', response='nc')

for id in gliders:
    e.dataset_id = id
    e.constraints = constraints
    e.variables = variables

    df = e.to_pandas(
        index_col='time',
        parse_dates=True,
        skiprows=(1, )  # units information can be dropped.
    ).dropna()

#%%
e.dataset_id = gliders[5]
e.constraints = constraints
e.variables = variables

df = e.to_pandas(
Beispiel #26
0
#########################################################################################################################
"""
import os
import pandas as pd
from erddapy import ERDDAP

#https://coastwatch.pfeg.noaa.gov/erddap/info/wocecpr/index.html
### initializing the erddap class instance with the data server address and the connection protocol.
e = ERDDAP(
    server='https://coastwatch.pfeg.noaa.gov/erddap',
    protocol='tabledap',
)
### specifying the data format of the response.
e.response = 'csv'
### specifying the database name we need the data from.
e.dataset_id = 'wocecpr'
### specifying the data constraints
e.constraints = {
    'time>=': '2000-01-15T01:24:00Z',
    'time<=': '2010-01-17T13:39:00Z',
    'latitude>=': 37.0,
    'latitude<=': 43.43,
    'longitude>=': 317.56,
    'longitude<=': 322.87,
}
###specifying the variables(columns name) to be retrived.
e.variables = [
    'sample',
    'latitude',
    'longitude',
    'life_stage',
Beispiel #27
0
- sponsor: Organization that owns and maintains the station;
- Met: Total number of met messages released to the GTS
- Wave: Total number of wave messages released to the GTS

In this notebook we will explore the statistics of the messages IOOS is releasing to GTS.

The first step is to download the data. We will use an ERDDAP server that [hosts the CSV files](https://ferret.pmel.noaa.gov/generic/erddap/files/ioos_obs_counts/) with the ingest data.

from datetime import date

from erddapy import ERDDAP

server = "http://osmc.noaa.gov/erddap"
e = ERDDAP(server=server, protocol="tabledap")

e.dataset_id = "ioos_obs_counts"
e.variables = ["time", "locationID", "region", "sponsor", "met", "wave"]
e.constraints = {
    "time>=": "2019-09",
    "time<": "2020-11",
}

df = e.to_pandas(parse_dates=True)

df["locationID"] = df["locationID"].str.lower()

df.tail()

The table has all the ingest data from 2019-01-01 to 2020-06-01. We can now explore it grouping the data by IOOS Regional Association (RA).

groups = df.groupby("region")
    #e.dataset_id = drifter_year + '_Argos_Drifters_NRT'
    #use this until we can get location quality back into older years
    #currently it is only in erddap for 2020 and newer
    #if int(drifter_years[0]) >= 2020:
    e.variables = [
        'trajectory_id', 'strain', 'voltage', 'time', 'latitude', 'sst',
        'longitude', 'location_quality'
    ]
    #else:
    #    e.variables = ['trajectory_id','strain', 'voltage', 'time', 'latitude', 'sst',
    #                   'longitude']

    e.constraints = {'trajectory_id=': argos_id}
    df_years = {}
    for year in drifter_years:
        e.dataset_id = year + '_Argos_Drifters_NRT'
        df = e.to_pandas(
            index_col='time (UTC)',
            parse_dates=True,
            skiprows=(1, )  # units information can be dropped.
        )
        df.columns = [x[1].split()[0] for x in enumerate(df.columns)]
        df_years[year] = df
    df = pd.concat(df_years.values())
    #get rid of timezone info
    df = df.tz_localize(None)
    # # names = ['trajectory_id','strain','voltage','datetime','latitude','sst','longitude']
    # # df=pd.read_csv(filename, skiprows=1, header=0, names=names, parse_dates=[3])
    # # #df['longitude'] = df.longitude - 360
    # df['datetime'] = df.datetime.dt.tz_localize(None) #to remove timezone info
    # df.set_index(['datetime'], inplace=True)
    'time>=': str(tini),
    'time<=': str(tend),
    'latitude>=': lat_lim[0],
    'latitude<=': lat_lim[1],
    'longitude>=': lon_lim[0],
    'longitude<=': lon_lim[1],
}

variables = [
    'depth', 'latitude', 'longitude', 'time', 'temperature', 'salinity'
]

e = ERDDAP(server=url_glider, protocol='tabledap', response='nc')

#%% RU33
e.dataset_id = 'ru33-20200715T1558'
e.constraints = constraints
e.variables = variables

# checking data frame is not empty
df = e.to_pandas()
if len(df.index) != 0:

    # Converting glider data to data frame
    df = e.to_pandas(
        index_col='time (UTC)',
        parse_dates=True,
        skiprows=(1, )  # units information can be dropped.
    ).dropna()

    # Coverting glider vectors into arrays
hc = np.asarray(doppio.variables['hc'])

igrid = 1

#%% Reading bathymetry data

ncbath = xr.open_dataset(bath_file)
bath_lat = ncbath.variables['lat'][:]
bath_lon = ncbath.variables['lon'][:]
bath_elev = ncbath.variables['elevation'][:]

#%% Looping through all gliders found

for id in gliders:
    print('Reading ' + id )
    e.dataset_id = id
    e.constraints = constraints
    e.variables = variables
    
    # chacking data frame is not empty
    df = e.to_pandas()
    if len(df.index) != 0 :
    
        # Converting glider data to data frame
        df = e.to_pandas(
                index_col='time (UTC)',
                parse_dates=True,
                skiprows=(1,)  # units information can be dropped.
                ).dropna()

        # Coverting glider vectors into arrays
Beispiel #31
0
def load_data_from_erddap(config, station_id=None, station_data=None):
    mcf_template = yaml.load(open(config['static_data']['mcf_template'], 'r'),
                             Loader=yaml.FullLoader)

    es = ERDDAP(
        server=config['dynamic_data']['erddap_server'],
        protocol=config['dynamic_data']['erddap_protocol'],
    )

    if station_id is None:
        #load all station data MCF skeleton
        stations = {}
        es.dataset_id = 'allDatasets'

        # filter out "log in" datasets as the vast majoirty of their available metadata is unavailable
        es.constraints = {'accessible=': 'public'}
        stations_df = es.to_pandas()

        # drop 'allDatasets' row
        stations_df.drop(labels=0, axis='index', inplace=True)
        print(stations_df)

        for index_label, row_series in stations_df.iterrows():
            id = row_series['datasetID']

            # ensure each station has an independant copy of the MCF skeleton
            stations[id] = copy.deepcopy(mcf_template)
            dataset_url = row_series['tabledap'] if row_series[
                'dataStructure'] == 'table' else row_series['griddap']

            stations[id]['metadata']['identifier'] = id
            stations[id]['metadata']['dataseturi'] = dataset_url

            stations[id]['spatial']['datatype'] = 'textTable' if row_series[
                'dataStructure'] == 'table' else 'grid'

            stations[id]['spatial']['geomtype'] = row_series['cdm_data_type']
            stations[id]['spatial']['bbox'] = '%s,%s,%s,%s' % (
                row_series['minLongitude (degrees_east)'],
                row_series['minLatitude (degrees_north)'],
                row_series['maxLongitude (degrees_east)'],
                row_series['maxLatitude (degrees_north)'])

            stations[id]['identification']['title'] = row_series['title']
            stations[id]['identification']['dates']['creation'] = row_series[
                'minTime (UTC)']
            stations[id]['identification']['temporal_begin'] = row_series[
                'minTime (UTC)']
            stations[id]['identification']['temporal_end'] = row_series[
                'maxTime (UTC)']
            stations[id]['identification']['url'] = dataset_url
            stations[id]['identification']['abstract'] = row_series['summary']

            stations[id]['distribution']['erddap']['url'] = dataset_url
            stations[id]['distribution']['erddap']['name'] = row_series[
                'title']

        print('Stations after ERDDAP call...')
        print(stations)

        return_value = stations
        pass

    else:
        #load specific station data into MCF skeleton
        print('Loading ERDDAP metadata for station: %s' % (station_id))

        es.dataset_id = station_id

        metadata_url = es.get_download_url(dataset_id='%s/index' %
                                           (station_id),
                                           response='csv',
                                           protocol='info')
        metadata = pd.read_csv(filepath_or_buffer=metadata_url)
        print(metadata_url)
        print(metadata.head())

        # ERDDAP ISO XML provides a list of dataset field names (long & short), data types & units
        # of measurement, in case this becomes useful for the CIOOS metadata standard we can extend
        # the YAML skeleton to include these and the template to export them.
        #
        # below most varible attributes from ERDDAP are extracted and pivoted to describe the field
        # actual field data types are extracted seperately and merged into the pivoted dataframe
        # for completeness
        columns_pivot = metadata[(metadata['Variable Name'] != 'NC_GLOBAL')
                                 & (metadata['Row Type'] != 'variable')].pivot(
                                     index='Variable Name',
                                     columns='Attribute Name',
                                     values='Value')
        col_data_types = metadata[(metadata['Row Type'] == 'variable')][[
            'Variable Name', 'Data Type'
        ]]
        df_merge = pd.merge(columns_pivot, col_data_types, on='Variable Name')

        station_data['dataset'] = {}

        for index_label, field_series in df_merge.iterrows():
            field_name = field_series['Variable Name']
            station_data['dataset'][field_name] = {}
            station_data['dataset'][field_name]['long_name'] = field_series[
                'long_name']
            station_data['dataset'][field_name]['data_type'] = field_series[
                'Data Type']
            station_data['dataset'][field_name]['units'] = field_series[
                'units']

        station_data['identification']['keywords']['default'][
            'keywords'] = metadata[
                (metadata['Variable Name'] == 'NC_GLOBAL')
                & (metadata['Attribute Name'] == 'keywords')]['Value'].values

        return_value = station_data

    return return_value