def load_glider(dataset_id='ru32-20190102T1317-profile-sci-rt', server="http://slocum-data.marine.rutgers.edu/erddap"): ''' Load glider data from erddap. input dataset ID and server Returns an xarray dataset indexed on time ''' # should change: write to_netcdf, then check if netcdf exists e = ERDDAP( server=server, protocol="tabledap", response="nc", ) e.dataset_id = dataset_id gds = e.to_xarray() # want to have the dimention be time not obs number gds = gds.swap_dims({"obs": "time"}) gds = gds.sortby("time") # drop repeated time values gds = gds.sel(time=~gds.indexes['time'].duplicated()) # get the seafloor depths too e2 = ERDDAP( server="http://slocum-data.marine.rutgers.edu/erddap", protocol="tabledap", response="nc", ) # get some of the raw data: # e2.dataset_id = dataset_id[:-14] + 'trajectory-raw-rt' e2.dataset_id = dataset_id.replace('profile-sci', 'trajectory-raw') e2.variables = ['time', 'm_water_depth', 'm_pitch'] # this connects to the data and load into an xarray dataset gds_raw = e2.to_xarray().drop_dims('trajectory') # want to have the dimention be time not obs number gds_raw = gds_raw.swap_dims({"obs": "time"}) gds_raw = gds_raw.sortby("time") gds_raw = gds_raw.sel(time=~gds_raw.indexes['time'].duplicated()) # remove bad values: gds_raw['m_water_depth'] = gds_raw.m_water_depth.where( gds_raw.m_water_depth > 10, drop=True) gds['bottom_depth'] = gds_raw.m_water_depth.interp_like(gds, method='nearest') return gds
def load_data(self, year='2019'): self.dfs = {} for index, row in self.df.iterrows(): if (self.glider_id in row['Dataset ID']) and (year in row['Dataset ID']): print(row['Dataset ID']) try: e = ERDDAP( server=self.server_url, protocol='tabledap', response='csv', ) e.dataset_id = row['Dataset ID'] e.constraints = self.constraints e.variables = self.variables[row['Dataset ID']] except HTTPError: print('Failed to generate url {}'.format( row['Dataset ID'])) continue self.dfs.update({ row['Dataset ID']: e.to_pandas( index_col='time (UTC)', parse_dates=True, skiprows=(1, ) # units information can be dropped. ) }) return (self.dfs)
def active_drifters(bbox=None, time_start=None, time_end=None): bbox = bbox or [-100, -40, 18, 60] time_end = time_end or dt.date.today() time_start = time_start or (time_end - dt.timedelta(days=1)) t0 = time_start.strftime('%Y-%m-%dT%H:%M:%SZ') t1 = time_end.strftime('%Y-%m-%dT%H:%M:%SZ') e = ERDDAP(server='OSMC', protocol="tabledap") e.dataset_id = "gdp_interpolated_drifter" # Setting constraints e.constraints = { "time>=": t0, "time<=": t1, 'longitude>=': bbox[0], 'longitude<=': bbox[1], 'latitude>=': bbox[2], 'latitude<=': bbox[3], } # e.variables = [ # "WMO", # "latitude", # "longitude", # "time", # ] try: df = e.to_pandas() except ValueError: return pd.DataFrame() return df
def retrieve_variable_names_erddap_server(url_erddap, dataset_id): """ Created on Tue Nov 3 11:26:05 2020 @author: aristizabal This function retrieves the variable names from the IOOS and Rutgers erddapp glider servers. Inputs: url_erddap: url address of erddap server Example: 'https://data.ioos.us/gliders/erddap' dataset_id: Example: 'ng231-20190901T0000' Outputs: variables: list of variables for the requested dataset_id """ from erddapy import ERDDAP e = ERDDAP(server=url_erddap, protocol='tabledap', response='nc') e.dataset_id = dataset_id df = e.to_pandas() variable_names = [var for var in df.columns] print('List of available variables ') print(variable_names) return variable_names
def get_erddap_dataset(server, protocol, file_type, ds_id, var_list=None): e = ERDDAP(server=server, protocol=protocol, response=file_type) e.dataset_id = ds_id if var_list: e.variables = var_list ds = e.to_xarray() ds = ds.sortby(ds.time) return ds
def active_argo_floats(bbox=None, time_start=None, time_end=None, floats=None): """ :param lon_lims: list containing westernmost longitude and easternmost latitude :param lat_lims: list containing southernmost latitude and northernmost longitude :param time_start: time to start looking for floats :param time_end: time to end looking for floats :return: """ bbox = bbox or [-100, -45, 5, 46] time_end = time_end or dt.date.today() time_start = time_start or (time_end - dt.timedelta(days=1)) floats = floats or False constraints = { 'time>=': str(time_start), 'time<=': str(time_end), } if bbox: constraints['longitude>='] = bbox[0] constraints['longitude<='] = bbox[1] constraints['latitude>='] = bbox[2] constraints['latitude<='] = bbox[3] if floats: constraints['platform_number='] = floats variables = [ 'platform_number', 'time', 'pres', 'longitude', 'latitude', 'temp', 'psal', ] e = ERDDAP( server='IFREMER', protocol='tabledap', response='nc' ) e.dataset_id = 'ArgoFloats' e.constraints = constraints e.variables = variables try: df = e.to_pandas( parse_dates=['time (UTC)'], skiprows=(1,) # units information can be dropped. ).dropna() except HTTPError: df = pd.DataFrame() return df
def get_erddap_dataset(server, ds_id, variables=None, constraints=None): variables = variables or None constraints = constraints or None e = ERDDAP(server=server, protocol='tabledap', response='nc') e.dataset_id = ds_id if constraints: e.constraints = constraints if variables: e.variables = variables ds = e.to_xarray() ds = ds.sortby(ds.time) return ds
def test_erddap_requests_kwargs(): """ Test that an ERDDAP instance can have requests_kwargs attribute assigned and are passed to the underlying methods """ base_url = "http://www.neracoos.org/erddap" timeout_seconds = 1 # request timeout in seconds slowwly_milliseconds = (timeout_seconds + 1) * 1000 slowwly_url = ("http://slowwly.robertomurray.co.uk/delay/" + str(slowwly_milliseconds) + "/url/" + base_url) connection = ERDDAP(slowwly_url) connection.dataset_id = "M01_sbe37_all" connection.protocol = "tabledap" connection.requests_kwargs["timeout"] = timeout_seconds with pytest.raises(ReadTimeout): connection.to_xarray()
def check_dataset_empty(url_erddap,dataset_id,date_ini,date_end,lon_lim,lat_lim): from erddapy import ERDDAP constraints = { 'time>=': date_ini, 'time<=': date_end, 'latitude>=': lat_lim[0], 'latitude<=': lat_lim[1], 'longitude>=': lon_lim[0], 'longitude<=': lon_lim[1], } variable_names = [ 'depth', 'latitude', 'longitude', 'time', 'temperature', 'salinity' ] e = ERDDAP( server=url_erddap, protocol='tabledap', response='nc' ) e.dataset_id = dataset_id e.constraints = constraints e.variables = variable_names # Converting glider data to data frame # Cheching that data frame has data df = e.to_pandas() if len(df) < 4: empty_dataset = True else: empty_dataset = False return empty_dataset
def get_erddap_dataset(ds_id, variables=None, constraints=None, filetype=None): """ Returns a netcdf dataset for a specified dataset ID (or dataframe if dataset cannot be converted to xarray) :param ds_id: dataset ID e.g. ng314-20200806T2040 :param variables: optional list of variables :param constraints: optional list of constraints :param filetype: optional filetype to return, 'nc' (default) or 'dataframe' :return: netcdf dataset """ variables = variables or None constraints = constraints or None filetype = filetype or 'nc' e = ERDDAP(server='NGDAC', protocol='tabledap', response='nc') e.dataset_id = ds_id if constraints: e.constraints = constraints if variables: e.variables = variables if filetype == 'nc': try: ds = e.to_xarray() ds = ds.sortby(ds.time) except OSError: print('No dataset available for specified constraints: {}'.format( ds_id)) ds = [] except TypeError: print('Cannot convert to xarray, providing dataframe: {}'.format( ds_id)) ds = e.to_pandas().dropna() elif filetype == 'dataframe': ds = e.to_pandas().dropna() else: print('Unrecognized filetype: {}. Needs to be "nc" or "dataframe"'. format(filetype)) return ds
def load_data(self,year='2019'): self.dfs = {} for index,row in self.df.iterrows(): if (self.glider_id in row['Dataset ID']) and (year in row['Dataset ID']): print(row['Dataset ID']) try: e = ERDDAP(server=self.server_url, protocol='tabledap', response='csv', ) e.dataset_id=row['Dataset ID'] e.constraints=self.constraints e.variables=self.variables[row['Dataset ID']] except HTTPError: print('Failed to generate url {}'.format(row['Dataset ID'])) continue self.dfs.update({row['Dataset ID']: e.to_pandas( index_col='time (UTC)', parse_dates=True, skiprows=(1,) # units information can be dropped. )}) return(self.dfs)
def get_erddap_data(dataset_id): ''' :param dataset_id: the deployment name example:'ce_311-20200708T1723' :return: pandas DataFrame with deployment variable values ''' e = ERDDAP( server='https://gliders.ioos.us/erddap', protocol='tabledap', ) e.response = 'csv' e.dataset_id = dataset_id e.variables = [ 'depth', 'latitude', 'longitude', 'salinity', 'temperature', 'conductivity', 'density', 'time', ] df = e.to_pandas() return df
def read_glider_variables_erddap_server(url_erddap,dataset_id,\ lat_lim,lon_lim,\ variable_names=['time'], **kwargs): """ Created on Tue Nov 3 11:26:05 2020 @author: aristizabal This function reads glider variables from the IOOS and Rutgers erddapp glider servers. Inputs: url_erddap: url address of erddap server Example: 'https://data.ioos.us/gliders/erddap' dataset_id: Example: 'ng231-20190901T0000' variable_names: list of variable names. Example: variable_names = ['depth', 'latitude', 'longitude', 'time', 'temperature', 'salinity'] The default value is variable_names=['time'] lat_lim: latitude limits for the search. Example, lat_lim = [38.0,40.0] lon_lim: longitude limits for the search. Example, lon_lim = [-75.0,-72.0] date_ini: initial date of time window. This function accepts the data formats '%Y-%m-%d T %H:%M:%S Z' and '%Y/%m/%d/%H'. Examaple: date_ini = '2018-08-02T00:00:00Z' or '2018/08/02/00' date_end: initial date of time window. This function uses the data format '%Y-%m-%d T %H:%M:%S Z'. Examaple: date_ini = '2018-08-10T00:00:00Z' and '2018/08/10/00' Outputs: df: Pandas data frame with all the variables requested as vectors """ from erddapy import ERDDAP import numpy as np date_ini = kwargs.get('date_ini', None) date_end = kwargs.get('date_end', None) # Find time window of interest if np.logical_or(date_ini == None, date_end == None): constraints = { 'latitude>=': lat_lim[0], 'latitude<=': lat_lim[1], 'longitude>=': lon_lim[0], 'longitude<=': lon_lim[1], } else: constraints = { 'time>=': date_ini, 'time<=': date_end, 'latitude>=': lat_lim[0], 'latitude<=': lat_lim[1], 'longitude>=': lon_lim[0], 'longitude<=': lon_lim[1], } e = ERDDAP(server=url_erddap, protocol='tabledap', response='nc') e.dataset_id = dataset_id e.constraints = constraints e.variables = variable_names # Converting glider data to data frame # Cheching that data frame has data df = e.to_pandas() if len(df) > 3: df = e.to_pandas(parse_dates=True) return df
def read_glider_data_erddap_server(url_erddap,dataset_id,\ lat_lim,lon_lim,scatter_plot,**kwargs): """ Created on Tue Feb 5 10:05:37 2019 @author: aristizabal This function reads glider data from the IOOS Data Assembly Center (DAC). Inputs: url_erddap: url address of thredds server Example: 'https://data.ioos.us/gliders/erddap' dataset_id: this id is retrieved from the glider DAC using the function "retrieve_glider_id_erddap_server". Example: 'ru30-20180705T1825' lat_lim: latitude limits for the search. Example, lat_lim = [38.0,40.0] lon_lim: longitude limits for the search. Example, lon_lim = [-75.0,-72.0] date_ini: initial date of time window. This function accepts the data formats '%Y-%m-%d T %H:%M:%S Z' and '%Y/%m/%d/%H'. Examaple: date_ini = '2018-08-02T00:00:00Z' or '2018/08/02/00' date_end: initial date of time window. This function uses the data format '%Y-%m-%d T %H:%M:%S Z'. Examaple: date_ini = '2018-08-10T00:00:00Z' and '2018/08/10/00' scatter_plot: if equal to 'yes' then a scatter plot of the glider transect is plotted Outputs: tempg: all the glider profiles of temperature within the user defined time window saltg: all the glider profiles of salinity within the user defined time window latg: latitude within the user defined time window long: longitude within the user defined time window timeg: user defined time window depthg: depth vector for all profiles """ from erddapy import ERDDAP import matplotlib.pyplot as plt import matplotlib.dates as mdates import cmocean import numpy as np date_ini = kwargs.get('date_ini', None) date_end = kwargs.get('date_end', None) # Find time window of interest if np.logical_or(date_ini == None, date_end == None): constraints = { 'latitude>=': lat_lim[0], 'latitude<=': lat_lim[1], 'longitude>=': lon_lim[0], 'longitude<=': lon_lim[1], } else: constraints = { 'time>=': date_ini, 'time<=': date_end, 'latitude>=': lat_lim[0], 'latitude<=': lat_lim[1], 'longitude>=': lon_lim[0], 'longitude<=': lon_lim[1], } variables = [ 'depth', 'latitude', 'longitude', 'time', 'temperature', 'salinity' ] e = ERDDAP(server=url_erddap, protocol='tabledap', response='nc') e.dataset_id = dataset_id e.constraints = constraints e.variables = variables # Converting glider data to data frame # Cheching that data frame has data df = e.to_pandas() if len(df) > 3: df = e.to_pandas( index_col='time (UTC)', parse_dates=True, skiprows=(1, ) # units information can be dropped. ).dropna() # Coverting glider vectors into arrays timeg, ind = np.unique(df.index.values, return_index=True) latg = df['latitude (degrees_north)'].values[ind] long = df['longitude (degrees_east)'].values[ind] dg = df['depth (m)'].values vg1 = df[df.columns[3]].values vg2 = df[df.columns[4]].values zn = np.int(np.max(np.diff(np.hstack([ind, len(dg)])))) depthg = np.empty((zn, len(timeg))) depthg[:] = np.nan tempg = np.empty((zn, len(timeg))) tempg[:] = np.nan saltg = np.empty((zn, len(timeg))) saltg[:] = np.nan for i, ii in enumerate(ind): if i < len(timeg) - 1: depthg[0:len(dg[ind[i]:ind[i + 1]]), i] = dg[ind[i]:ind[i + 1]] tempg[0:len(vg1[ind[i]:ind[i + 1]]), i] = vg1[ind[i]:ind[i + 1]] saltg[0:len(vg2[ind[i]:ind[i + 1]]), i] = vg2[ind[i]:ind[i + 1]] else: depthg[0:len(dg[ind[i]:len(dg)]), i] = dg[ind[i]:len(dg)] tempg[0:len(vg1[ind[i]:len(vg1)]), i] = vg1[ind[i]:len(vg1)] saltg[0:len(vg2[ind[i]:len(vg2)]), i] = vg2[ind[i]:len(vg2)] # Scatter plot if scatter_plot == 'yes': color_map = cmocean.cm.thermal varg = tempg timeg_matrix = np.tile(timeg.T, (depthg.shape[0], 1)) ttg = np.ravel(timeg_matrix) dg = np.ravel(depthg) teg = np.ravel(varg) kw = dict(c=teg, marker='*', edgecolor='none') fig, ax = plt.subplots(figsize=(10, 3)) cs = ax.scatter(ttg, -dg, cmap=color_map, **kw) #fig.colorbar(cs) ax.set_xlim(timeg[0], timeg[-1]) ax.set_ylabel('Depth (m)', fontsize=14) cbar = plt.colorbar(cs) cbar.ax.set_ylabel('Temperature ($^oC$)', fontsize=14) ax.set_title(dataset_id, fontsize=16) xfmt = mdates.DateFormatter('%H:%Mh\n%d-%b') ax.xaxis.set_major_formatter(xfmt) plt.ylim([-np.nanmax(dg), 0]) color_map = cmocean.cm.haline varg = saltg timeg_matrix = np.tile(timeg.T, (depthg.shape[0], 1)) ttg = np.ravel(timeg_matrix) dg = np.ravel(depthg) teg = np.ravel(varg) kw = dict(c=teg, marker='*', edgecolor='none') fig, ax = plt.subplots(figsize=(10, 3)) cs = ax.scatter(ttg, -dg, cmap=color_map, **kw) #fig.colorbar(cs) ax.set_xlim(timeg[0], timeg[-1]) ax.set_ylabel('Depth (m)', fontsize=14) cbar = plt.colorbar(cs) cbar.ax.set_ylabel('Salinity', fontsize=14) ax.set_title(dataset_id, fontsize=16) xfmt = mdates.DateFormatter('%H:%Mh\n%d-%b') ax.xaxis.set_major_formatter(xfmt) plt.ylim([-np.nanmax(dg), 0]) else: tempg = np.nan saltg = np.nan timeg = np.nan latg = np.nan long = np.nan depthg = np.nan return tempg, saltg, timeg, latg, long, depthg
def active_gliders(bbox=None, time_start=None, time_end=dt.date.today(), glider_id=None): bbox = bbox or [-100, -40, 18, 60] time_start = time_start or (time_end - dt.timedelta(days=1)) t0 = time_start.strftime('%Y-%m-%dT%H:%M:%SZ') t1 = time_end.strftime('%Y-%m-%dT%H:%M:%SZ') glider_id = glider_id or None e = ERDDAP(server='NGDAC') # Grab every dataset available # datasets = pd.read_csv(e.get_search_url(response='csv', search_for='all')) # Search constraints kw = dict() kw['min_time'] = t0 kw['max_time'] = t1 if bbox: kw['min_lon'] = bbox[0] kw['max_lon'] = bbox[1] kw['min_lat'] = bbox[2] kw['max_lat'] = bbox[3] if glider_id: search = glider_id else: search = None search_url = e.get_search_url(search_for=search, response='csv', **kw) try: # Grab the results search = pd.read_csv(search_url) except: # return empty dataframe if there are no results return pd.DataFrame() # Extract the IDs gliders = search['Dataset ID'].values msg = 'Found {} Glider Datasets:\n\n{}'.format print(msg(len(gliders), '\n'.join(gliders))) # Setting constraints constraints = { 'time>=': t0, 'time<=': t1, 'longitude>=': bbox[0], 'longitude<=': bbox[1], 'latitude>=': bbox[2], 'latitude<=': bbox[3], } variables = [ 'depth', 'latitude', 'longitude', 'time', 'temperature', 'salinity', ] e = ERDDAP( server='NGDAC', protocol='tabledap', response='nc' ) glider_dfs = [] for id in gliders: # print('Reading ' + id) e.dataset_id = id e.constraints = constraints e.variables = variables # checking data frame is not empty try: df = e.to_pandas( index_col='time (UTC)', parse_dates=True, skiprows=(1,) # units information can be dropped. ).dropna() except: continue df = df.reset_index() df['dataset_id'] = id df = df.set_index(['dataset_id', 'time (UTC)']) glider_dfs.append(df) try: ndf = pd.concat(glider_dfs) except ValueError: return pd.DataFrame() return ndf
def read_glider_data_erddap_Rutgers_server(url_erddap,dataset_id,\ lat_lim,lon_lim,scatter_plot,**kwargs): from erddapy import ERDDAP import matplotlib.pyplot as plt import matplotlib.dates as mdates import cmocean import numpy as np date_ini = kwargs.get('date_ini', None) date_end = kwargs.get('date_end', None) # Find time window of interest if np.logical_or(date_ini == None, date_end == None): constraints = { 'latitude>=': lat_lim[0], 'latitude<=': lat_lim[1], 'longitude>=': lon_lim[0], 'longitude<=': lon_lim[1], } else: constraints = { 'time>=': date_ini, 'time<=': date_end, 'latitude>=': lat_lim[0], 'latitude<=': lat_lim[1], 'longitude>=': lon_lim[0], 'longitude<=': lon_lim[1], } variables = [ 'depth', 'latitude', 'longitude', 'time', 'temperature', 'salinity' ] e = ERDDAP(server=url_erddap, protocol='tabledap', response='nc') e.dataset_id = dataset_id e.constraints = constraints e.variables = variables # Converting glider data to data frame # Cheching that data frame has data df = e.to_pandas() if len(df) != 0: df = e.to_pandas( index_col='time (UTC)', parse_dates=True, skiprows=(1, ) # units information can be dropped. ).dropna() dg = df['depth (m)'].values tg = df.index.values vg1 = df[df.columns[3]].values vg2 = df[df.columns[4]].values upcast = np.where(np.diff(dg) < 0)[0] oku = np.where(np.diff(upcast) > 1)[0] end_upcast = upcast[oku] downcast = np.where(np.diff(dg) > 0)[0] okd = np.where(np.diff(downcast) > 1)[0] end_downcast = downcast[okd] ind = np.hstack( [0, np.unique(np.hstack([end_upcast, end_downcast])), len(dg)]) zn = np.max(np.diff(ind)) depthg = np.empty((zn, len(ind))) depthg[:] = np.nan timeg = np.empty((zn, len(ind))) timeg[:] = np.nan tempg = np.empty((zn, len(ind))) tempg[:] = np.nan saltg = np.empty((zn, len(ind))) saltg[:] = np.nan for i in np.arange(len(ind)): if i == 0: indd = np.argsort(dg[ind[i]:ind[i + 1] + 2]) depthg[0:len(dg[ind[i]:ind[i + 1] + 2]), i] = dg[ind[i]:ind[i + 1] + 2][indd] timeg[0:len(dg[ind[i]:ind[i + 1] + 2]), i] = mdates.date2num(tg[ind[i]:ind[i + 1] + 2][indd]) tempg[0:len(vg1[ind[i]:ind[i + 1] + 2]), i] = vg1[ind[i]:ind[i + 1] + 2][indd] saltg[0:len(vg2[ind[i]:ind[i + 1] + 2]), i] = vg2[ind[i]:ind[i + 1] + 2][indd] if i < len(ind) - 1: indd = np.argsort(dg[ind[i] + 1:ind[i + 1] + 2]) depthg[0:len(dg[ind[i] + 1:ind[i + 1] + 2]), i] = dg[ind[i] + 1:ind[i + 1] + 2][indd] timeg[0:len(dg[ind[i] + 1:ind[i + 1] + 2]), i] = mdates.date2num(tg[ind[i] + 1:ind[i + 1] + 2][indd]) tempg[0:len(vg1[ind[i] + 1:ind[i + 1] + 2]), i] = vg1[ind[i] + 1:ind[i + 1] + 2][indd] saltg[0:len(vg2[ind[i] + 1:ind[i + 1] + 2]), i] = vg2[ind[i] + 1:ind[i + 1] + 2][indd] else: indd = np.argsort(dg[ind[i] + 1:len(dg)]) depthg[0:len(dg[ind[i] + 1:len(dg)]), i] = dg[ind[i] + 1:len(dg)][indd] timeg[0:len(dg[ind[i] + 1:len(dg)]), i] = mdates.date2num(tg[ind[i] + 1:len(dg)][indd]) tempg[0:len(vg1[ind[i] + 1:len(vg1)]), i] = vg1[ind[i] + 1:len(vg1)][indd] saltg[0:len(vg2[ind[i] + 1:len(vg2)]), i] = vg2[ind[i] + 1:len(vg2)][indd] # Scatter plot if scatter_plot == 'yes': color_map = cmocean.cm.thermal varg = tempg #timeg_matrix = np.tile(timeg.T,(depthg.shape[0],1)) ttg = np.ravel(timeg) dg = np.ravel(depthg) teg = np.ravel(varg) kw = dict(c=teg, marker='*', edgecolor='none') fig, ax = plt.subplots(figsize=(10, 3)) cs = ax.scatter(ttg, -dg, cmap=color_map, **kw) #fig.colorbar(cs) ax.set_xlim(np.nanmin(ttg), np.nanmax(ttg)) ax.set_ylabel('Depth (m)', fontsize=14) cbar = plt.colorbar(cs) cbar.ax.set_ylabel('Temperature ($^oC$)', fontsize=14) ax.set_title(dataset_id, fontsize=16) xfmt = mdates.DateFormatter('%H:%Mh\n%d-%b') ax.xaxis.set_major_formatter(xfmt) plt.ylim([-np.nanmax(dg), 0]) color_map = cmocean.cm.haline varg = saltg #timeg_matrix = np.tile(timeg.T,(depthg.shape[0],1)) ttg = np.ravel(timeg) dg = np.ravel(depthg) teg = np.ravel(varg) kw = dict(c=teg, marker='*', edgecolor='none') fig, ax = plt.subplots(figsize=(10, 3)) cs = ax.scatter(ttg, -dg, cmap=color_map, **kw) #fig.colorbar(cs) ax.set_xlim(np.nanmin(ttg), np.nanmax(ttg)) ax.set_ylabel('Depth (m)', fontsize=14) cbar = plt.colorbar(cs) cbar.ax.set_ylabel('Salinity', fontsize=14) ax.set_title(dataset_id, fontsize=16) xfmt = mdates.DateFormatter('%H:%Mh\n%d-%b') ax.xaxis.set_major_formatter(xfmt) plt.ylim([-np.nanmax(dg), 0]) return tempg, saltg, timeg, latg, long, depthg
from pathlib import Path import pandas as pd from erddapy import ERDDAP path = Path().absolute() fname = path.joinpath("data", "water_level_example.csv") if fname.is_file(): data = pd.read_csv(fname, parse_dates=["time (UTC)"]) else: e = ERDDAP( server="http://erddap.aoos.org/erddap/", protocol="tabledap" ) e.dataset_id = "kotzebue-alaska-water-level" e.constraints = { "time>=": "2018-09-05T21:00:00Z", "time<=": "2019-07-10T19:00:00Z", } e.variables = [ variable_name, "time", "z", ] data = e.to_pandas( index_col="time (UTC)", parse_dates=True, ) data["timestamp"] = data.index.astype("int64") // 1e9 data.to_csv(fname)
def get_ndbc(bbox=None, time_start=None, time_end=None, buoy=None): bbox = bbox or [-100, -45, 5, 46] time_end = time_end or dt.date.today() time_start = time_start or (time_end - dt.timedelta(days=1)) buoy = buoy or False time_formatter = '%Y-%m-%dT%H:%M:%SZ' e = ERDDAP( server='CSWC', protocol='tabledap', response='csv' ) e.dataset_id = 'cwwcNDBCMet' e.constraints = { 'time>=': time_start.strftime(time_formatter), 'time<=': time_end.strftime(time_formatter), } if bbox: e.constraints['longitude>='] = bbox[0] e.constraints['longitude<='] = bbox[1] e.constraints['latitude>='] = bbox[2] e.constraints['latitude<='] = bbox[3] e.variables = [ "station", "latitude", "longitude", "time" ] if buoy: e.constraints['station='] = buoy df = e.to_pandas( parse_dates=['time (UTC)'], skiprows=(1,) # units information can be dropped. ).dropna() stations = df.station.unique() # e.variables = [ # "station", # "latitude", # "longitude", # "wd", # "wspd", # "gst", # "wvht", # "dpd", # "apd", # "mwd", # "bar", # "atmp", # "wtmp", # "dewp", # # "vis", # # "ptdy", # # "tide", # "wspu", # "wspv", # "time", # ] try: df = e.to_pandas( parse_dates=['time (UTC)'], skiprows=(1,) # units information can be dropped. ).dropna() except HTTPError: df = pd.DataFrame() return df
[wmo_mb,sat]=consulta_estacao() x=time.gmtime(time.time()-3600*24*100) for i in range(len(wmo_mb)): print(wmo_mb[i]) e = ERDDAP( server='http://osmc.noaa.gov/erddap', protocol='tabledap', ) e.response = 'csv' e.dataset_id = 'OSMC_30day' e.constraints = { 'time>=': str(x.tm_year)+"-"+str(x.tm_mon).zfill(2)+"-"+str(x.tm_mday).zfill(2)+"T"+str(x.tm_hour).zfill(2)+":00:00Z", 'longitude>=': -80.0, 'longitude<=': 80.0, 'platform_type=': "DRIFTING BUOYS (GENERIC)", 'platform_code=': str(wmo_mb[i]), } e.variables = [ 'platform_code', 'time', 'latitude', 'longitude', 'sst', 'slp', ]
def grid_glider( dataset_id, varz2d=[ 'potential_temperature', 'salinity', 'cdom', 'chlorophyll_a', 'beta_700nm' ], zgrid=np.arange(0, 1000, 5), ): '''grid the glider data from RUCOOL Erddap. this needs work''' import xarray as xr import pandas as pd from erddapy import ERDDAP from scipy.signal import find_peaks from scipy import stats e = ERDDAP( server="http://slocum-data.marine.rutgers.edu/erddap", protocol="tabledap", response="nc", ) # get the science data: e.dataset_id = dataset_id # this connects to the data and load into an pandas dataframe ds = e.to_pandas() # remove the spaces from the column names ds.columns = ds.columns.str.split(' ').str[0] # get the time to be a datetime object ds['time'] = pd.to_datetime(ds['time']) # put the times in order ds = ds.sort_values(by=['time']) # fill nans in dpeth for the profile breakup interpd = ds.depth.interpolate() # find the top and bottom of each profile apogee, prop = find_peaks(interpd.values, threshold=None, distance=None, prominence=50) perogee, prop = find_peaks(-1 * interpd.values, threshold=None, distance=None, prominence=50) # stack the index of the turning points into one vector turns = np.sort(np.append(apogee, perogee)) # this is your depth grid, you can set: zgrd = zgrid # list of variables to grid in 2d: # you choose from the columns of the science data dataz = varz2d # this is a dict to hold our gridded stuff # until we make a dataset later d2 = {} # loop on the variables you want to bin for varz in dataz: values = ds[varz] # grab some data #this thing below bins the data ret = stats.binned_statistic_2d(ds.index.values, ds.depth, values, statistic='mean', bins=[turns, zgrd]) d2[varz] = ret.statistic.T # things to bin in the x direction oneDvars = ['latitude', 'longitude', 'time', 'u', 'v'] # NB: u, v only have one value per dive sequence, so only half the number profiles! # actually, its weirder than that... not sure there are more than half... # dict to hold our 1d bins d1 = {} # loop on 1d stuff: for thing in oneDvars: if thing == 'time': bin_means, bin_edges, binnumber = stats.binned_statistic( ds.index.values, ds[thing].astype(int), statistic='mean', bins=turns) bin_means = pd.to_datetime(bin_means) else: bin_means, bin_edges, binnumber = stats.binned_statistic( ds.index.values, ds[thing].values, statistic=np.nanmean, bins=turns) d1[thing] = bin_means # need the depth grid centers zgrd_ctr = zgrd[:-1] + np.diff(zgrd).mean() / 2 # create the dataset ds_gridded = xr.Dataset(coords={ 'date': d1['time'].values, 'depth': zgrd_ctr, 'lat': ('date', d1['latitude']), 'lon': ('date', d1['longitude']) }, data_vars={ 'u': ('date', d1['u']), 'v': ('date', d1['v']) }) # add the other data for varz in dataz: ds_gridded[varz] = (('depth', 'date'), d2[varz]) return ds_gridded
import matplotlib.dates as mdates import matplotlib.pyplot as plt from erddapy import ERDDAP import pandas as pd import seaborn as sns sns.set(rc={'figure.figsize': (11, 4)}) e = ERDDAP( server='https://erddap.marine.ie/erddap', protocol='tabledap', ) e.dataset_id = 'IWBNetwork' e.constraints = { 'time>=': '2015-06-28T00:00:00Z', 'station_id=': 'M3' } e.variables = [ 'time', 'AtmosphericPressure', 'WindDirection', 'WindSpeed', 'WaveHeight', 'WavePeriod', 'MeanWaveDirection', # 'Hmax', # 'AirTemperature', 'SeaTemperature'
'time>=': min_time, 'time<=': max_time, 'latitude>=': lat_lim[0], 'latitude<=': lat_lim[-1], 'longitude>=': lon_lim[0], 'longitude<=': lon_lim[-1], } variables = ['time', 'latitude', 'longitude'] #%% e = ERDDAP(server=server, protocol='tabledap', response='nc') for id in gliders: e.dataset_id = id e.constraints = constraints e.variables = variables df = e.to_pandas(parse_dates=True) print(id, df.index[-1]) #%% Reading bathymetry data ncbath = xr.open_dataset(bath_file) bath_lat = ncbath.variables['lat'][:] bath_lon = ncbath.variables['lon'][:] bath_elev = ncbath.variables['elevation'][:] oklatbath = np.logical_and(bath_lat >= lat_lim[0], bath_lat <= lat_lim[-1])
constraints = { "time>=": "2016-07-10T00:00:00Z", "time<=": "2017-02-10T00:00:00Z", "latitude>=": 38.0, "latitude<=": 41.0, "longitude>=": -72.0, "longitude<=": -69.0, } from erddapy import ERDDAP e = ERDDAP(server=server, protocol=protocol,) e.dataset_id = dataset_id e.variables = variables e.constraints = constraints print(e.get_download_url()) If we change the response to `html` we can visualize the page. def show_iframe(src): from IPython.display import HTML iframe = '<iframe src="{src}" width="100%" height="950"></iframe>'.format return HTML(iframe(src=src)) show_iframe(e.get_download_url(response="html"))
datasets = to_df(url)['Dataset ID'] datasets # This returns all of the datasets available for the Coastal Pioneer Surface Mooring. The three available nodes are: # * BUOY (surface buoy) # * MFN (multifunction node - on the bottom of the ocean) # * NSIF (near-surface instrument frame - located at 7 m depth) # # First, lets try the CTDBP on the NSIF: url = erd.get_search_url(search_for='"CP01CNSM NSIF CTDBP"', response='csv') datasets = to_df(url)['Dataset ID'] datasets erd.dataset_id = datasets[0] # Check what variables are available on the dataset: info_url = erd.get_info_url(response='html') show_iframe(info_url) info_url = erd.get_info_url(response='csv') info_df = to_df(info_url) info_df info_df[info_df['Row Type'] == 'variable'] # Take a look at the variables with standard names:
'time>=': '2018-06-01T00:00:00Z', 'time<=': '2018-11-30T00:00:00Z', 'latitude>=': 15.0, 'latitude<=': 45.0, 'longitude>=': -100.0, 'longitude<=': -60.0, } variables = ['latitude', 'longitude', 'time'] #%% e = ERDDAP(server=server, protocol='tabledap', response='nc') for id in gliders: e.dataset_id = id e.constraints = constraints e.variables = variables df = e.to_pandas( index_col='time', parse_dates=True, skiprows=(1, ) # units information can be dropped. ).dropna() #%% e.dataset_id = gliders[5] e.constraints = constraints e.variables = variables df = e.to_pandas(
######################################################################################################################### """ import os import pandas as pd from erddapy import ERDDAP #https://coastwatch.pfeg.noaa.gov/erddap/info/wocecpr/index.html ### initializing the erddap class instance with the data server address and the connection protocol. e = ERDDAP( server='https://coastwatch.pfeg.noaa.gov/erddap', protocol='tabledap', ) ### specifying the data format of the response. e.response = 'csv' ### specifying the database name we need the data from. e.dataset_id = 'wocecpr' ### specifying the data constraints e.constraints = { 'time>=': '2000-01-15T01:24:00Z', 'time<=': '2010-01-17T13:39:00Z', 'latitude>=': 37.0, 'latitude<=': 43.43, 'longitude>=': 317.56, 'longitude<=': 322.87, } ###specifying the variables(columns name) to be retrived. e.variables = [ 'sample', 'latitude', 'longitude', 'life_stage',
- sponsor: Organization that owns and maintains the station; - Met: Total number of met messages released to the GTS - Wave: Total number of wave messages released to the GTS In this notebook we will explore the statistics of the messages IOOS is releasing to GTS. The first step is to download the data. We will use an ERDDAP server that [hosts the CSV files](https://ferret.pmel.noaa.gov/generic/erddap/files/ioos_obs_counts/) with the ingest data. from datetime import date from erddapy import ERDDAP server = "http://osmc.noaa.gov/erddap" e = ERDDAP(server=server, protocol="tabledap") e.dataset_id = "ioos_obs_counts" e.variables = ["time", "locationID", "region", "sponsor", "met", "wave"] e.constraints = { "time>=": "2019-09", "time<": "2020-11", } df = e.to_pandas(parse_dates=True) df["locationID"] = df["locationID"].str.lower() df.tail() The table has all the ingest data from 2019-01-01 to 2020-06-01. We can now explore it grouping the data by IOOS Regional Association (RA). groups = df.groupby("region")
#e.dataset_id = drifter_year + '_Argos_Drifters_NRT' #use this until we can get location quality back into older years #currently it is only in erddap for 2020 and newer #if int(drifter_years[0]) >= 2020: e.variables = [ 'trajectory_id', 'strain', 'voltage', 'time', 'latitude', 'sst', 'longitude', 'location_quality' ] #else: # e.variables = ['trajectory_id','strain', 'voltage', 'time', 'latitude', 'sst', # 'longitude'] e.constraints = {'trajectory_id=': argos_id} df_years = {} for year in drifter_years: e.dataset_id = year + '_Argos_Drifters_NRT' df = e.to_pandas( index_col='time (UTC)', parse_dates=True, skiprows=(1, ) # units information can be dropped. ) df.columns = [x[1].split()[0] for x in enumerate(df.columns)] df_years[year] = df df = pd.concat(df_years.values()) #get rid of timezone info df = df.tz_localize(None) # # names = ['trajectory_id','strain','voltage','datetime','latitude','sst','longitude'] # # df=pd.read_csv(filename, skiprows=1, header=0, names=names, parse_dates=[3]) # # #df['longitude'] = df.longitude - 360 # df['datetime'] = df.datetime.dt.tz_localize(None) #to remove timezone info # df.set_index(['datetime'], inplace=True)
'time>=': str(tini), 'time<=': str(tend), 'latitude>=': lat_lim[0], 'latitude<=': lat_lim[1], 'longitude>=': lon_lim[0], 'longitude<=': lon_lim[1], } variables = [ 'depth', 'latitude', 'longitude', 'time', 'temperature', 'salinity' ] e = ERDDAP(server=url_glider, protocol='tabledap', response='nc') #%% RU33 e.dataset_id = 'ru33-20200715T1558' e.constraints = constraints e.variables = variables # checking data frame is not empty df = e.to_pandas() if len(df.index) != 0: # Converting glider data to data frame df = e.to_pandas( index_col='time (UTC)', parse_dates=True, skiprows=(1, ) # units information can be dropped. ).dropna() # Coverting glider vectors into arrays
hc = np.asarray(doppio.variables['hc']) igrid = 1 #%% Reading bathymetry data ncbath = xr.open_dataset(bath_file) bath_lat = ncbath.variables['lat'][:] bath_lon = ncbath.variables['lon'][:] bath_elev = ncbath.variables['elevation'][:] #%% Looping through all gliders found for id in gliders: print('Reading ' + id ) e.dataset_id = id e.constraints = constraints e.variables = variables # chacking data frame is not empty df = e.to_pandas() if len(df.index) != 0 : # Converting glider data to data frame df = e.to_pandas( index_col='time (UTC)', parse_dates=True, skiprows=(1,) # units information can be dropped. ).dropna() # Coverting glider vectors into arrays
def load_data_from_erddap(config, station_id=None, station_data=None): mcf_template = yaml.load(open(config['static_data']['mcf_template'], 'r'), Loader=yaml.FullLoader) es = ERDDAP( server=config['dynamic_data']['erddap_server'], protocol=config['dynamic_data']['erddap_protocol'], ) if station_id is None: #load all station data MCF skeleton stations = {} es.dataset_id = 'allDatasets' # filter out "log in" datasets as the vast majoirty of their available metadata is unavailable es.constraints = {'accessible=': 'public'} stations_df = es.to_pandas() # drop 'allDatasets' row stations_df.drop(labels=0, axis='index', inplace=True) print(stations_df) for index_label, row_series in stations_df.iterrows(): id = row_series['datasetID'] # ensure each station has an independant copy of the MCF skeleton stations[id] = copy.deepcopy(mcf_template) dataset_url = row_series['tabledap'] if row_series[ 'dataStructure'] == 'table' else row_series['griddap'] stations[id]['metadata']['identifier'] = id stations[id]['metadata']['dataseturi'] = dataset_url stations[id]['spatial']['datatype'] = 'textTable' if row_series[ 'dataStructure'] == 'table' else 'grid' stations[id]['spatial']['geomtype'] = row_series['cdm_data_type'] stations[id]['spatial']['bbox'] = '%s,%s,%s,%s' % ( row_series['minLongitude (degrees_east)'], row_series['minLatitude (degrees_north)'], row_series['maxLongitude (degrees_east)'], row_series['maxLatitude (degrees_north)']) stations[id]['identification']['title'] = row_series['title'] stations[id]['identification']['dates']['creation'] = row_series[ 'minTime (UTC)'] stations[id]['identification']['temporal_begin'] = row_series[ 'minTime (UTC)'] stations[id]['identification']['temporal_end'] = row_series[ 'maxTime (UTC)'] stations[id]['identification']['url'] = dataset_url stations[id]['identification']['abstract'] = row_series['summary'] stations[id]['distribution']['erddap']['url'] = dataset_url stations[id]['distribution']['erddap']['name'] = row_series[ 'title'] print('Stations after ERDDAP call...') print(stations) return_value = stations pass else: #load specific station data into MCF skeleton print('Loading ERDDAP metadata for station: %s' % (station_id)) es.dataset_id = station_id metadata_url = es.get_download_url(dataset_id='%s/index' % (station_id), response='csv', protocol='info') metadata = pd.read_csv(filepath_or_buffer=metadata_url) print(metadata_url) print(metadata.head()) # ERDDAP ISO XML provides a list of dataset field names (long & short), data types & units # of measurement, in case this becomes useful for the CIOOS metadata standard we can extend # the YAML skeleton to include these and the template to export them. # # below most varible attributes from ERDDAP are extracted and pivoted to describe the field # actual field data types are extracted seperately and merged into the pivoted dataframe # for completeness columns_pivot = metadata[(metadata['Variable Name'] != 'NC_GLOBAL') & (metadata['Row Type'] != 'variable')].pivot( index='Variable Name', columns='Attribute Name', values='Value') col_data_types = metadata[(metadata['Row Type'] == 'variable')][[ 'Variable Name', 'Data Type' ]] df_merge = pd.merge(columns_pivot, col_data_types, on='Variable Name') station_data['dataset'] = {} for index_label, field_series in df_merge.iterrows(): field_name = field_series['Variable Name'] station_data['dataset'][field_name] = {} station_data['dataset'][field_name]['long_name'] = field_series[ 'long_name'] station_data['dataset'][field_name]['data_type'] = field_series[ 'Data Type'] station_data['dataset'][field_name]['units'] = field_series[ 'units'] station_data['identification']['keywords']['default'][ 'keywords'] = metadata[ (metadata['Variable Name'] == 'NC_GLOBAL') & (metadata['Attribute Name'] == 'keywords')]['Value'].values return_value = station_data return return_value