def load_data(self, year='2019'): self.dfs = {} for index, row in self.df.iterrows(): if (self.glider_id in row['Dataset ID']) and (year in row['Dataset ID']): print(row['Dataset ID']) try: e = ERDDAP( server=self.server_url, protocol='tabledap', response='csv', ) e.dataset_id = row['Dataset ID'] e.constraints = self.constraints e.variables = self.variables[row['Dataset ID']] except HTTPError: print('Failed to generate url {}'.format( row['Dataset ID'])) continue self.dfs.update({ row['Dataset ID']: e.to_pandas( index_col='time (UTC)', parse_dates=True, skiprows=(1, ) # units information can be dropped. ) }) return (self.dfs)
def get_erddap_dataset(server, protocol, file_type, ds_id, var_list=None): e = ERDDAP(server=server, protocol=protocol, response=file_type) e.dataset_id = ds_id if var_list: e.variables = var_list ds = e.to_xarray() ds = ds.sortby(ds.time) return ds
def load_glider(dataset_id='ru32-20190102T1317-profile-sci-rt', server="http://slocum-data.marine.rutgers.edu/erddap"): ''' Load glider data from erddap. input dataset ID and server Returns an xarray dataset indexed on time ''' # should change: write to_netcdf, then check if netcdf exists e = ERDDAP( server=server, protocol="tabledap", response="nc", ) e.dataset_id = dataset_id gds = e.to_xarray() # want to have the dimention be time not obs number gds = gds.swap_dims({"obs": "time"}) gds = gds.sortby("time") # drop repeated time values gds = gds.sel(time=~gds.indexes['time'].duplicated()) # get the seafloor depths too e2 = ERDDAP( server="http://slocum-data.marine.rutgers.edu/erddap", protocol="tabledap", response="nc", ) # get some of the raw data: # e2.dataset_id = dataset_id[:-14] + 'trajectory-raw-rt' e2.dataset_id = dataset_id.replace('profile-sci', 'trajectory-raw') e2.variables = ['time', 'm_water_depth', 'm_pitch'] # this connects to the data and load into an xarray dataset gds_raw = e2.to_xarray().drop_dims('trajectory') # want to have the dimention be time not obs number gds_raw = gds_raw.swap_dims({"obs": "time"}) gds_raw = gds_raw.sortby("time") gds_raw = gds_raw.sel(time=~gds_raw.indexes['time'].duplicated()) # remove bad values: gds_raw['m_water_depth'] = gds_raw.m_water_depth.where( gds_raw.m_water_depth > 10, drop=True) gds['bottom_depth'] = gds_raw.m_water_depth.interp_like(gds, method='nearest') return gds
def active_argo_floats(bbox=None, time_start=None, time_end=None, floats=None): """ :param lon_lims: list containing westernmost longitude and easternmost latitude :param lat_lims: list containing southernmost latitude and northernmost longitude :param time_start: time to start looking for floats :param time_end: time to end looking for floats :return: """ bbox = bbox or [-100, -45, 5, 46] time_end = time_end or dt.date.today() time_start = time_start or (time_end - dt.timedelta(days=1)) floats = floats or False constraints = { 'time>=': str(time_start), 'time<=': str(time_end), } if bbox: constraints['longitude>='] = bbox[0] constraints['longitude<='] = bbox[1] constraints['latitude>='] = bbox[2] constraints['latitude<='] = bbox[3] if floats: constraints['platform_number='] = floats variables = [ 'platform_number', 'time', 'pres', 'longitude', 'latitude', 'temp', 'psal', ] e = ERDDAP( server='IFREMER', protocol='tabledap', response='nc' ) e.dataset_id = 'ArgoFloats' e.constraints = constraints e.variables = variables try: df = e.to_pandas( parse_dates=['time (UTC)'], skiprows=(1,) # units information can be dropped. ).dropna() except HTTPError: df = pd.DataFrame() return df
def get_erddap_dataset(server, ds_id, variables=None, constraints=None): variables = variables or None constraints = constraints or None e = ERDDAP(server=server, protocol='tabledap', response='nc') e.dataset_id = ds_id if constraints: e.constraints = constraints if variables: e.variables = variables ds = e.to_xarray() ds = ds.sortby(ds.time) return ds
def check_dataset_empty(url_erddap,dataset_id,date_ini,date_end,lon_lim,lat_lim): from erddapy import ERDDAP constraints = { 'time>=': date_ini, 'time<=': date_end, 'latitude>=': lat_lim[0], 'latitude<=': lat_lim[1], 'longitude>=': lon_lim[0], 'longitude<=': lon_lim[1], } variable_names = [ 'depth', 'latitude', 'longitude', 'time', 'temperature', 'salinity' ] e = ERDDAP( server=url_erddap, protocol='tabledap', response='nc' ) e.dataset_id = dataset_id e.constraints = constraints e.variables = variable_names # Converting glider data to data frame # Cheching that data frame has data df = e.to_pandas() if len(df) < 4: empty_dataset = True else: empty_dataset = False return empty_dataset
def get_erddap_dataset(ds_id, variables=None, constraints=None, filetype=None): """ Returns a netcdf dataset for a specified dataset ID (or dataframe if dataset cannot be converted to xarray) :param ds_id: dataset ID e.g. ng314-20200806T2040 :param variables: optional list of variables :param constraints: optional list of constraints :param filetype: optional filetype to return, 'nc' (default) or 'dataframe' :return: netcdf dataset """ variables = variables or None constraints = constraints or None filetype = filetype or 'nc' e = ERDDAP(server='NGDAC', protocol='tabledap', response='nc') e.dataset_id = ds_id if constraints: e.constraints = constraints if variables: e.variables = variables if filetype == 'nc': try: ds = e.to_xarray() ds = ds.sortby(ds.time) except OSError: print('No dataset available for specified constraints: {}'.format( ds_id)) ds = [] except TypeError: print('Cannot convert to xarray, providing dataframe: {}'.format( ds_id)) ds = e.to_pandas().dropna() elif filetype == 'dataframe': ds = e.to_pandas().dropna() else: print('Unrecognized filetype: {}. Needs to be "nc" or "dataframe"'. format(filetype)) return ds
def get_erddap_data(dataset_id): ''' :param dataset_id: the deployment name example:'ce_311-20200708T1723' :return: pandas DataFrame with deployment variable values ''' e = ERDDAP( server='https://gliders.ioos.us/erddap', protocol='tabledap', ) e.response = 'csv' e.dataset_id = dataset_id e.variables = [ 'depth', 'latitude', 'longitude', 'salinity', 'temperature', 'conductivity', 'density', 'time', ] df = e.to_pandas() return df
def load_data(self,year='2019'): self.dfs = {} for index,row in self.df.iterrows(): if (self.glider_id in row['Dataset ID']) and (year in row['Dataset ID']): print(row['Dataset ID']) try: e = ERDDAP(server=self.server_url, protocol='tabledap', response='csv', ) e.dataset_id=row['Dataset ID'] e.constraints=self.constraints e.variables=self.variables[row['Dataset ID']] except HTTPError: print('Failed to generate url {}'.format(row['Dataset ID'])) continue self.dfs.update({row['Dataset ID']: e.to_pandas( index_col='time (UTC)', parse_dates=True, skiprows=(1,) # units information can be dropped. )}) return(self.dfs)
def read_glider_data_erddap_server(url_erddap,dataset_id,\ lat_lim,lon_lim,scatter_plot,**kwargs): """ Created on Tue Feb 5 10:05:37 2019 @author: aristizabal This function reads glider data from the IOOS Data Assembly Center (DAC). Inputs: url_erddap: url address of thredds server Example: 'https://data.ioos.us/gliders/erddap' dataset_id: this id is retrieved from the glider DAC using the function "retrieve_glider_id_erddap_server". Example: 'ru30-20180705T1825' lat_lim: latitude limits for the search. Example, lat_lim = [38.0,40.0] lon_lim: longitude limits for the search. Example, lon_lim = [-75.0,-72.0] date_ini: initial date of time window. This function accepts the data formats '%Y-%m-%d T %H:%M:%S Z' and '%Y/%m/%d/%H'. Examaple: date_ini = '2018-08-02T00:00:00Z' or '2018/08/02/00' date_end: initial date of time window. This function uses the data format '%Y-%m-%d T %H:%M:%S Z'. Examaple: date_ini = '2018-08-10T00:00:00Z' and '2018/08/10/00' scatter_plot: if equal to 'yes' then a scatter plot of the glider transect is plotted Outputs: tempg: all the glider profiles of temperature within the user defined time window saltg: all the glider profiles of salinity within the user defined time window latg: latitude within the user defined time window long: longitude within the user defined time window timeg: user defined time window depthg: depth vector for all profiles """ from erddapy import ERDDAP import matplotlib.pyplot as plt import matplotlib.dates as mdates import cmocean import numpy as np date_ini = kwargs.get('date_ini', None) date_end = kwargs.get('date_end', None) # Find time window of interest if np.logical_or(date_ini == None, date_end == None): constraints = { 'latitude>=': lat_lim[0], 'latitude<=': lat_lim[1], 'longitude>=': lon_lim[0], 'longitude<=': lon_lim[1], } else: constraints = { 'time>=': date_ini, 'time<=': date_end, 'latitude>=': lat_lim[0], 'latitude<=': lat_lim[1], 'longitude>=': lon_lim[0], 'longitude<=': lon_lim[1], } variables = [ 'depth', 'latitude', 'longitude', 'time', 'temperature', 'salinity' ] e = ERDDAP(server=url_erddap, protocol='tabledap', response='nc') e.dataset_id = dataset_id e.constraints = constraints e.variables = variables # Converting glider data to data frame # Cheching that data frame has data df = e.to_pandas() if len(df) > 3: df = e.to_pandas( index_col='time (UTC)', parse_dates=True, skiprows=(1, ) # units information can be dropped. ).dropna() # Coverting glider vectors into arrays timeg, ind = np.unique(df.index.values, return_index=True) latg = df['latitude (degrees_north)'].values[ind] long = df['longitude (degrees_east)'].values[ind] dg = df['depth (m)'].values vg1 = df[df.columns[3]].values vg2 = df[df.columns[4]].values zn = np.int(np.max(np.diff(np.hstack([ind, len(dg)])))) depthg = np.empty((zn, len(timeg))) depthg[:] = np.nan tempg = np.empty((zn, len(timeg))) tempg[:] = np.nan saltg = np.empty((zn, len(timeg))) saltg[:] = np.nan for i, ii in enumerate(ind): if i < len(timeg) - 1: depthg[0:len(dg[ind[i]:ind[i + 1]]), i] = dg[ind[i]:ind[i + 1]] tempg[0:len(vg1[ind[i]:ind[i + 1]]), i] = vg1[ind[i]:ind[i + 1]] saltg[0:len(vg2[ind[i]:ind[i + 1]]), i] = vg2[ind[i]:ind[i + 1]] else: depthg[0:len(dg[ind[i]:len(dg)]), i] = dg[ind[i]:len(dg)] tempg[0:len(vg1[ind[i]:len(vg1)]), i] = vg1[ind[i]:len(vg1)] saltg[0:len(vg2[ind[i]:len(vg2)]), i] = vg2[ind[i]:len(vg2)] # Scatter plot if scatter_plot == 'yes': color_map = cmocean.cm.thermal varg = tempg timeg_matrix = np.tile(timeg.T, (depthg.shape[0], 1)) ttg = np.ravel(timeg_matrix) dg = np.ravel(depthg) teg = np.ravel(varg) kw = dict(c=teg, marker='*', edgecolor='none') fig, ax = plt.subplots(figsize=(10, 3)) cs = ax.scatter(ttg, -dg, cmap=color_map, **kw) #fig.colorbar(cs) ax.set_xlim(timeg[0], timeg[-1]) ax.set_ylabel('Depth (m)', fontsize=14) cbar = plt.colorbar(cs) cbar.ax.set_ylabel('Temperature ($^oC$)', fontsize=14) ax.set_title(dataset_id, fontsize=16) xfmt = mdates.DateFormatter('%H:%Mh\n%d-%b') ax.xaxis.set_major_formatter(xfmt) plt.ylim([-np.nanmax(dg), 0]) color_map = cmocean.cm.haline varg = saltg timeg_matrix = np.tile(timeg.T, (depthg.shape[0], 1)) ttg = np.ravel(timeg_matrix) dg = np.ravel(depthg) teg = np.ravel(varg) kw = dict(c=teg, marker='*', edgecolor='none') fig, ax = plt.subplots(figsize=(10, 3)) cs = ax.scatter(ttg, -dg, cmap=color_map, **kw) #fig.colorbar(cs) ax.set_xlim(timeg[0], timeg[-1]) ax.set_ylabel('Depth (m)', fontsize=14) cbar = plt.colorbar(cs) cbar.ax.set_ylabel('Salinity', fontsize=14) ax.set_title(dataset_id, fontsize=16) xfmt = mdates.DateFormatter('%H:%Mh\n%d-%b') ax.xaxis.set_major_formatter(xfmt) plt.ylim([-np.nanmax(dg), 0]) else: tempg = np.nan saltg = np.nan timeg = np.nan latg = np.nan long = np.nan depthg = np.nan return tempg, saltg, timeg, latg, long, depthg
def active_gliders(bbox=None, time_start=None, time_end=dt.date.today(), glider_id=None): bbox = bbox or [-100, -40, 18, 60] time_start = time_start or (time_end - dt.timedelta(days=1)) t0 = time_start.strftime('%Y-%m-%dT%H:%M:%SZ') t1 = time_end.strftime('%Y-%m-%dT%H:%M:%SZ') glider_id = glider_id or None e = ERDDAP(server='NGDAC') # Grab every dataset available # datasets = pd.read_csv(e.get_search_url(response='csv', search_for='all')) # Search constraints kw = dict() kw['min_time'] = t0 kw['max_time'] = t1 if bbox: kw['min_lon'] = bbox[0] kw['max_lon'] = bbox[1] kw['min_lat'] = bbox[2] kw['max_lat'] = bbox[3] if glider_id: search = glider_id else: search = None search_url = e.get_search_url(search_for=search, response='csv', **kw) try: # Grab the results search = pd.read_csv(search_url) except: # return empty dataframe if there are no results return pd.DataFrame() # Extract the IDs gliders = search['Dataset ID'].values msg = 'Found {} Glider Datasets:\n\n{}'.format print(msg(len(gliders), '\n'.join(gliders))) # Setting constraints constraints = { 'time>=': t0, 'time<=': t1, 'longitude>=': bbox[0], 'longitude<=': bbox[1], 'latitude>=': bbox[2], 'latitude<=': bbox[3], } variables = [ 'depth', 'latitude', 'longitude', 'time', 'temperature', 'salinity', ] e = ERDDAP( server='NGDAC', protocol='tabledap', response='nc' ) glider_dfs = [] for id in gliders: # print('Reading ' + id) e.dataset_id = id e.constraints = constraints e.variables = variables # checking data frame is not empty try: df = e.to_pandas( index_col='time (UTC)', parse_dates=True, skiprows=(1,) # units information can be dropped. ).dropna() except: continue df = df.reset_index() df['dataset_id'] = id df = df.set_index(['dataset_id', 'time (UTC)']) glider_dfs.append(df) try: ndf = pd.concat(glider_dfs) except ValueError: return pd.DataFrame() return ndf
def read_glider_variables_erddap_server(url_erddap,dataset_id,\ lat_lim,lon_lim,\ variable_names=['time'], **kwargs): """ Created on Tue Nov 3 11:26:05 2020 @author: aristizabal This function reads glider variables from the IOOS and Rutgers erddapp glider servers. Inputs: url_erddap: url address of erddap server Example: 'https://data.ioos.us/gliders/erddap' dataset_id: Example: 'ng231-20190901T0000' variable_names: list of variable names. Example: variable_names = ['depth', 'latitude', 'longitude', 'time', 'temperature', 'salinity'] The default value is variable_names=['time'] lat_lim: latitude limits for the search. Example, lat_lim = [38.0,40.0] lon_lim: longitude limits for the search. Example, lon_lim = [-75.0,-72.0] date_ini: initial date of time window. This function accepts the data formats '%Y-%m-%d T %H:%M:%S Z' and '%Y/%m/%d/%H'. Examaple: date_ini = '2018-08-02T00:00:00Z' or '2018/08/02/00' date_end: initial date of time window. This function uses the data format '%Y-%m-%d T %H:%M:%S Z'. Examaple: date_ini = '2018-08-10T00:00:00Z' and '2018/08/10/00' Outputs: df: Pandas data frame with all the variables requested as vectors """ from erddapy import ERDDAP import numpy as np date_ini = kwargs.get('date_ini', None) date_end = kwargs.get('date_end', None) # Find time window of interest if np.logical_or(date_ini == None, date_end == None): constraints = { 'latitude>=': lat_lim[0], 'latitude<=': lat_lim[1], 'longitude>=': lon_lim[0], 'longitude<=': lon_lim[1], } else: constraints = { 'time>=': date_ini, 'time<=': date_end, 'latitude>=': lat_lim[0], 'latitude<=': lat_lim[1], 'longitude>=': lon_lim[0], 'longitude<=': lon_lim[1], } e = ERDDAP(server=url_erddap, protocol='tabledap', response='nc') e.dataset_id = dataset_id e.constraints = constraints e.variables = variable_names # Converting glider data to data frame # Cheching that data frame has data df = e.to_pandas() if len(df) > 3: df = e.to_pandas(parse_dates=True) return df
### specifying the database name we need the data from. e.dataset_id = 'wocecpr' ### specifying the data constraints e.constraints = { 'time>=': '2000-01-15T01:24:00Z', 'time<=': '2010-01-17T13:39:00Z', 'latitude>=': 37.0, 'latitude<=': 43.43, 'longitude>=': 317.56, 'longitude<=': 322.87, } ###specifying the variables(columns name) to be retrived. e.variables = [ 'sample', 'latitude', 'longitude', 'life_stage', 'abundance', 'time', ] ### searching for the server link and doing the handshaking process. search_url = e.get_search_url(response='csv') ### receiving requested data and saving it into a dataframe search = pd.read_csv(search_url) df = e.to_pandas() ### receiving current working directory and saving the dataframe into a single csv file in that path. wd = os.getcwd() df.to_csv(wd + '/DataFiles/plankton_swocecpr.csv') #%% """ #########################################################################################################################
def get_ndbc(bbox=None, time_start=None, time_end=None, buoy=None): bbox = bbox or [-100, -45, 5, 46] time_end = time_end or dt.date.today() time_start = time_start or (time_end - dt.timedelta(days=1)) buoy = buoy or False time_formatter = '%Y-%m-%dT%H:%M:%SZ' e = ERDDAP( server='CSWC', protocol='tabledap', response='csv' ) e.dataset_id = 'cwwcNDBCMet' e.constraints = { 'time>=': time_start.strftime(time_formatter), 'time<=': time_end.strftime(time_formatter), } if bbox: e.constraints['longitude>='] = bbox[0] e.constraints['longitude<='] = bbox[1] e.constraints['latitude>='] = bbox[2] e.constraints['latitude<='] = bbox[3] e.variables = [ "station", "latitude", "longitude", "time" ] if buoy: e.constraints['station='] = buoy df = e.to_pandas( parse_dates=['time (UTC)'], skiprows=(1,) # units information can be dropped. ).dropna() stations = df.station.unique() # e.variables = [ # "station", # "latitude", # "longitude", # "wd", # "wspd", # "gst", # "wvht", # "dpd", # "apd", # "mwd", # "bar", # "atmp", # "wtmp", # "dewp", # # "vis", # # "ptdy", # # "tide", # "wspu", # "wspv", # "time", # ] try: df = e.to_pandas( parse_dates=['time (UTC)'], skiprows=(1,) # units information can be dropped. ).dropna() except HTTPError: df = pd.DataFrame() return df
if fname.is_file(): data = pd.read_csv(fname, parse_dates=["time (UTC)"]) else: e = ERDDAP( server="http://erddap.aoos.org/erddap/", protocol="tabledap" ) e.dataset_id = "kotzebue-alaska-water-level" e.constraints = { "time>=": "2018-09-05T21:00:00Z", "time<=": "2019-07-10T19:00:00Z", } e.variables = [ variable_name, "time", "z", ] data = e.to_pandas( index_col="time (UTC)", parse_dates=True, ) data["timestamp"] = data.index.astype("int64") // 1e9 data.to_csv(fname) data.head() from ioos_qc.config import QcConfig qc = QcConfig(qc_config)
def read_glider_data_erddap_Rutgers_server(url_erddap,dataset_id,\ lat_lim,lon_lim,scatter_plot,**kwargs): from erddapy import ERDDAP import matplotlib.pyplot as plt import matplotlib.dates as mdates import cmocean import numpy as np date_ini = kwargs.get('date_ini', None) date_end = kwargs.get('date_end', None) # Find time window of interest if np.logical_or(date_ini == None, date_end == None): constraints = { 'latitude>=': lat_lim[0], 'latitude<=': lat_lim[1], 'longitude>=': lon_lim[0], 'longitude<=': lon_lim[1], } else: constraints = { 'time>=': date_ini, 'time<=': date_end, 'latitude>=': lat_lim[0], 'latitude<=': lat_lim[1], 'longitude>=': lon_lim[0], 'longitude<=': lon_lim[1], } variables = [ 'depth', 'latitude', 'longitude', 'time', 'temperature', 'salinity' ] e = ERDDAP(server=url_erddap, protocol='tabledap', response='nc') e.dataset_id = dataset_id e.constraints = constraints e.variables = variables # Converting glider data to data frame # Cheching that data frame has data df = e.to_pandas() if len(df) != 0: df = e.to_pandas( index_col='time (UTC)', parse_dates=True, skiprows=(1, ) # units information can be dropped. ).dropna() dg = df['depth (m)'].values tg = df.index.values vg1 = df[df.columns[3]].values vg2 = df[df.columns[4]].values upcast = np.where(np.diff(dg) < 0)[0] oku = np.where(np.diff(upcast) > 1)[0] end_upcast = upcast[oku] downcast = np.where(np.diff(dg) > 0)[0] okd = np.where(np.diff(downcast) > 1)[0] end_downcast = downcast[okd] ind = np.hstack( [0, np.unique(np.hstack([end_upcast, end_downcast])), len(dg)]) zn = np.max(np.diff(ind)) depthg = np.empty((zn, len(ind))) depthg[:] = np.nan timeg = np.empty((zn, len(ind))) timeg[:] = np.nan tempg = np.empty((zn, len(ind))) tempg[:] = np.nan saltg = np.empty((zn, len(ind))) saltg[:] = np.nan for i in np.arange(len(ind)): if i == 0: indd = np.argsort(dg[ind[i]:ind[i + 1] + 2]) depthg[0:len(dg[ind[i]:ind[i + 1] + 2]), i] = dg[ind[i]:ind[i + 1] + 2][indd] timeg[0:len(dg[ind[i]:ind[i + 1] + 2]), i] = mdates.date2num(tg[ind[i]:ind[i + 1] + 2][indd]) tempg[0:len(vg1[ind[i]:ind[i + 1] + 2]), i] = vg1[ind[i]:ind[i + 1] + 2][indd] saltg[0:len(vg2[ind[i]:ind[i + 1] + 2]), i] = vg2[ind[i]:ind[i + 1] + 2][indd] if i < len(ind) - 1: indd = np.argsort(dg[ind[i] + 1:ind[i + 1] + 2]) depthg[0:len(dg[ind[i] + 1:ind[i + 1] + 2]), i] = dg[ind[i] + 1:ind[i + 1] + 2][indd] timeg[0:len(dg[ind[i] + 1:ind[i + 1] + 2]), i] = mdates.date2num(tg[ind[i] + 1:ind[i + 1] + 2][indd]) tempg[0:len(vg1[ind[i] + 1:ind[i + 1] + 2]), i] = vg1[ind[i] + 1:ind[i + 1] + 2][indd] saltg[0:len(vg2[ind[i] + 1:ind[i + 1] + 2]), i] = vg2[ind[i] + 1:ind[i + 1] + 2][indd] else: indd = np.argsort(dg[ind[i] + 1:len(dg)]) depthg[0:len(dg[ind[i] + 1:len(dg)]), i] = dg[ind[i] + 1:len(dg)][indd] timeg[0:len(dg[ind[i] + 1:len(dg)]), i] = mdates.date2num(tg[ind[i] + 1:len(dg)][indd]) tempg[0:len(vg1[ind[i] + 1:len(vg1)]), i] = vg1[ind[i] + 1:len(vg1)][indd] saltg[0:len(vg2[ind[i] + 1:len(vg2)]), i] = vg2[ind[i] + 1:len(vg2)][indd] # Scatter plot if scatter_plot == 'yes': color_map = cmocean.cm.thermal varg = tempg #timeg_matrix = np.tile(timeg.T,(depthg.shape[0],1)) ttg = np.ravel(timeg) dg = np.ravel(depthg) teg = np.ravel(varg) kw = dict(c=teg, marker='*', edgecolor='none') fig, ax = plt.subplots(figsize=(10, 3)) cs = ax.scatter(ttg, -dg, cmap=color_map, **kw) #fig.colorbar(cs) ax.set_xlim(np.nanmin(ttg), np.nanmax(ttg)) ax.set_ylabel('Depth (m)', fontsize=14) cbar = plt.colorbar(cs) cbar.ax.set_ylabel('Temperature ($^oC$)', fontsize=14) ax.set_title(dataset_id, fontsize=16) xfmt = mdates.DateFormatter('%H:%Mh\n%d-%b') ax.xaxis.set_major_formatter(xfmt) plt.ylim([-np.nanmax(dg), 0]) color_map = cmocean.cm.haline varg = saltg #timeg_matrix = np.tile(timeg.T,(depthg.shape[0],1)) ttg = np.ravel(timeg) dg = np.ravel(depthg) teg = np.ravel(varg) kw = dict(c=teg, marker='*', edgecolor='none') fig, ax = plt.subplots(figsize=(10, 3)) cs = ax.scatter(ttg, -dg, cmap=color_map, **kw) #fig.colorbar(cs) ax.set_xlim(np.nanmin(ttg), np.nanmax(ttg)) ax.set_ylabel('Depth (m)', fontsize=14) cbar = plt.colorbar(cs) cbar.ax.set_ylabel('Salinity', fontsize=14) ax.set_title(dataset_id, fontsize=16) xfmt = mdates.DateFormatter('%H:%Mh\n%d-%b') ax.xaxis.set_major_formatter(xfmt) plt.ylim([-np.nanmax(dg), 0]) return tempg, saltg, timeg, latg, long, depthg
protocol='tabledap', ) e.dataset_id = 'IWBNetwork' e.constraints = { 'time>=': '2015-06-28T00:00:00Z', 'station_id=': 'M3' } e.variables = [ 'time', 'AtmosphericPressure', 'WindDirection', 'WindSpeed', 'WaveHeight', 'WavePeriod', 'MeanWaveDirection', # 'Hmax', # 'AirTemperature', 'SeaTemperature' ] url = e.get_download_url() print(url) df = e.to_pandas( index_col='time (UTC)', parse_dates=True).dropna()
def GOFS_RTOFS_vs_Argo_floats(lon_forec_track, lat_forec_track, lon_forec_cone, lat_forec_cone, lon_best_track, lat_best_track, lon_lim, lat_lim, folder_fig): #%% User input #GOFS3.1 output model location url_GOFS_ts = 'http://tds.hycom.org/thredds/dodsC/GLBy0.08/expt_93.0/ts3z' # RTOFS files folder_RTOFS = '/home/coolgroup/RTOFS/forecasts/domains/hurricanes/RTOFS_6hourly_North_Atlantic/' nc_files_RTOFS = ['rtofs_glo_3dz_f006_6hrly_hvr_US_east.nc',\ 'rtofs_glo_3dz_f012_6hrly_hvr_US_east.nc',\ 'rtofs_glo_3dz_f018_6hrly_hvr_US_east.nc',\ 'rtofs_glo_3dz_f024_6hrly_hvr_US_east.nc'] # COPERNICUS MARINE ENVIRONMENT MONITORING SERVICE (CMEMS) url_cmems = 'http://nrt.cmems-du.eu/motu-web/Motu' service_id = 'GLOBAL_ANALYSIS_FORECAST_PHY_001_024-TDS' product_id = 'global-analysis-forecast-phy-001-024' depth_min = '0.493' out_dir = '/home/aristizabal/crontab_jobs' # Bathymetry file #bath_file = '/Users/aristizabal/Desktop/MARACOOS_project/Maria_scripts/nc_files/GEBCO_2014_2D_-100.0_0.0_-60.0_45.0.nc' bath_file = '/home/aristizabal/bathymetry_files/GEBCO_2014_2D_-100.0_0.0_-10.0_50.0.nc' # Argo floats url_Argo = 'http://www.ifremer.fr/erddap' #%% from matplotlib import pyplot as plt import numpy as np import xarray as xr import netCDF4 from datetime import datetime, timedelta import cmocean import matplotlib.dates as mdates from erddapy import ERDDAP import pandas as pd import os # Do not produce figures on screen plt.switch_backend('agg') # Increase fontsize of labels globally plt.rc('xtick', labelsize=14) plt.rc('ytick', labelsize=14) plt.rc('legend', fontsize=14) #%% Reading bathymetry data ncbath = xr.open_dataset(bath_file) bath_lat = ncbath.variables['lat'][:] bath_lon = ncbath.variables['lon'][:] bath_elev = ncbath.variables['elevation'][:] oklatbath = np.logical_and(bath_lat >= lat_lim[0], bath_lat <= lat_lim[-1]) oklonbath = np.logical_and(bath_lon >= lon_lim[0], bath_lon <= lon_lim[-1]) bath_latsub = bath_lat[oklatbath] bath_lonsub = bath_lon[oklonbath] bath_elevs = bath_elev[oklatbath, :] bath_elevsub = bath_elevs[:, oklonbath] #%% Get time bounds for current day #ti = datetime.today() ti = datetime.today() - timedelta(1) - timedelta(hours=6) tini = datetime(ti.year, ti.month, ti.day) te = ti + timedelta(2) tend = datetime(te.year, te.month, te.day) #%% Look for Argo datasets e = ERDDAP(server=url_Argo) # Grab every dataset available #datasets = pd.read_csv(e.get_search_url(response='csv', search_for='all')) kw = { 'min_lon': lon_lim[0], 'max_lon': lon_lim[1], 'min_lat': lat_lim[0], 'max_lat': lat_lim[1], 'min_time': str(tini), 'max_time': str(tend), } search_url = e.get_search_url(response='csv', **kw) # Grab the results search = pd.read_csv(search_url) # Extract the IDs dataset = search['Dataset ID'].values msg = 'Found {} Datasets:\n\n{}'.format print(msg(len(dataset), '\n'.join(dataset))) dataset_type = dataset[0] constraints = { 'time>=': str(tini), 'time<=': str(tend), 'latitude>=': lat_lim[0], 'latitude<=': lat_lim[1], 'longitude>=': lon_lim[0], 'longitude<=': lon_lim[1], } variables = [ 'platform_number', 'time', 'pres', 'longitude', 'latitude', 'temp', 'psal', ] e = ERDDAP(server=url_Argo, protocol='tabledap', response='nc') e.dataset_id = dataset_type e.constraints = constraints e.variables = variables print(e.get_download_url()) df = e.to_pandas( parse_dates=True, skiprows=(1, ) # units information can be dropped. ).dropna() argo_ids = np.asarray(df['platform_number']) argo_times = np.asarray(df['time (UTC)']) argo_press = np.asarray(df['pres (decibar)']) argo_lons = np.asarray(df['longitude (degrees_east)']) argo_lats = np.asarray(df['latitude (degrees_north)']) argo_temps = np.asarray(df['temp (degree_Celsius)']) argo_salts = np.asarray(df['psal (PSU)']) #%% GOGF 3.1 try: GOFS_ts = xr.open_dataset(url_GOFS_ts, decode_times=False) lt_GOFS = np.asarray(GOFS_ts['lat'][:]) ln_GOFS = np.asarray(GOFS_ts['lon'][:]) tt = GOFS_ts['time'] t_GOFS = netCDF4.num2date(tt[:], tt.units) depth_GOFS = np.asarray(GOFS_ts['depth'][:]) except Exception as err: print(err) GOFS_ts = np.nan lt_GOFS = np.nan ln_GOFS = np.nan depth_GOFS = np.nan t_GOFS = ti #%% Map Argo floats lev = np.arange(-9000, 9100, 100) plt.figure() plt.contourf(bath_lonsub, bath_latsub, bath_elevsub, lev, cmap=cmocean.cm.topo) plt.plot(lon_forec_track, lat_forec_track, '.-', color='gold') plt.plot(lon_forec_cone, lat_forec_cone, '.-b', markersize=1) plt.plot(lon_best_track, lat_best_track, 'or', markersize=3) argo_idd = np.unique(argo_ids) for i, id in enumerate(argo_idd): okind = np.where(argo_ids == id)[0] plt.plot(np.unique(argo_lons[okind]), np.unique(argo_lats[okind]), 's', color='darkorange', markersize=5, markeredgecolor='k') plt.title('Argo Floats ' + str(tini)[0:13] + '-' + str(tend)[0:13], fontsize=16) plt.axis('scaled') plt.xlim(lon_lim[0], lon_lim[1]) plt.ylim(lat_lim[0], lat_lim[1]) file = folder_fig + 'ARGO_lat_lon' #file = folder_fig + 'ARGO_lat_lon_' + str(np.unique(argo_times)[0])[0:10] plt.savefig(file, bbox_inches='tight', pad_inches=0.1) #%% Figure argo float vs GOFS and vs RTOFS argo_idd = np.unique(argo_ids) for i, id in enumerate(argo_idd): print(id) okind = np.where(argo_ids == id)[0] argo_time = np.asarray([ datetime.strptime(t, '%Y-%m-%dT%H:%M:%SZ') for t in argo_times[okind] ]) argo_lon = argo_lons[okind] argo_lat = argo_lats[okind] argo_pres = argo_press[okind] argo_temp = argo_temps[okind] argo_salt = argo_salts[okind] # GOFS print('Retrieving variables from GOFS') if isinstance(GOFS_ts, float): temp_GOFS = np.nan salt_GOFS = np.nan else: #oktt_GOFS = np.where(t_GOFS >= argo_time[0])[0][0] ttGOFS = np.asarray([ datetime(t_GOFS[i].year, t_GOFS[i].month, t_GOFS[i].day, t_GOFS[i].hour) for i in np.arange(len(t_GOFS)) ]) tstamp_GOFS = [ mdates.date2num(ttGOFS[i]) for i in np.arange(len(ttGOFS)) ] oktt_GOFS = np.unique( np.round( np.interp(mdates.date2num(argo_time[0]), tstamp_GOFS, np.arange(len(tstamp_GOFS)))).astype(int))[0] oklat_GOFS = np.where(lt_GOFS >= argo_lat[0])[0][0] oklon_GOFS = np.where(ln_GOFS >= argo_lon[0] + 360)[0][0] temp_GOFS = np.asarray(GOFS_ts['water_temp'][oktt_GOFS, :, oklat_GOFS, oklon_GOFS]) salt_GOFS = np.asarray(GOFS_ts['salinity'][oktt_GOFS, :, oklat_GOFS, oklon_GOFS]) # RTOFS #Time window year = int(argo_time[0].year) month = int(argo_time[0].month) day = int(argo_time[0].day) tini = datetime(year, month, day) tend = tini + timedelta(days=1) # Read RTOFS grid and time print('Retrieving coordinates from RTOFS') if tini.month < 10: if tini.day < 10: fol = 'rtofs.' + str(tini.year) + '0' + str( tini.month) + '0' + str(tini.day) else: fol = 'rtofs.' + str(tini.year) + '0' + str(tini.month) + str( tini.day) else: if tini.day < 10: fol = 'rtofs.' + str(tini.year) + str(tini.month) + '0' + str( tini.day) else: fol = 'rtofs.' + str(tini.year) + str(tini.month) + str( tini.day) ncRTOFS = xr.open_dataset(folder_RTOFS + fol + '/' + nc_files_RTOFS[0]) latRTOFS = np.asarray(ncRTOFS.Latitude[:]) lonRTOFS = np.asarray(ncRTOFS.Longitude[:]) depth_RTOFS = np.asarray(ncRTOFS.Depth[:]) tRTOFS = [] for t in np.arange(len(nc_files_RTOFS)): ncRTOFS = xr.open_dataset(folder_RTOFS + fol + '/' + nc_files_RTOFS[t]) tRTOFS.append(np.asarray(ncRTOFS.MT[:])[0]) tRTOFS = np.asarray([mdates.num2date(mdates.date2num(tRTOFS[t])) \ for t in np.arange(len(nc_files_RTOFS))]) oktt_RTOFS = np.where( mdates.date2num(tRTOFS) >= mdates.date2num(argo_time[0]))[0][0] oklat_RTOFS = np.where(latRTOFS[:, 0] >= argo_lat[0])[0][0] oklon_RTOFS = np.where(lonRTOFS[0, :] >= argo_lon[0])[0][0] nc_file = folder_RTOFS + fol + '/' + nc_files_RTOFS[oktt_RTOFS] ncRTOFS = xr.open_dataset(nc_file) #time_RTOFS = tRTOFS[oktt_RTOFS] temp_RTOFS = np.asarray(ncRTOFS.variables['temperature'][0, :, oklat_RTOFS, oklon_RTOFS]) salt_RTOFS = np.asarray(ncRTOFS.variables['salinity'][0, :, oklat_RTOFS, oklon_RTOFS]) #lon_RTOFS = lonRTOFS[0,oklon_RTOFS] #lat_RTOFS = latRTOFS[oklat_RTOFS,0] # Downloading and reading Copernicus output motuc = 'python -m motuclient --motu ' + url_cmems + \ ' --service-id ' + service_id + \ ' --product-id ' + product_id + \ ' --longitude-min ' + str(argo_lon[0]-2/12) + \ ' --longitude-max ' + str(argo_lon[0]+2/12) + \ ' --latitude-min ' + str(argo_lat[0]-2/12) + \ ' --latitude-max ' + str(argo_lat[0]+2/12) + \ ' --date-min ' + '"' + str(tini-timedelta(0.5)) + '"' + \ ' --date-max ' + '"' + str(tend+timedelta(0.5)) + '"' + \ ' --depth-min ' + depth_min + \ ' --depth-max ' + str(np.nanmax(argo_pres)+1000) + \ ' --variable ' + 'thetao' + ' ' + \ ' --variable ' + 'so' + ' ' + \ ' --out-dir ' + out_dir + \ ' --out-name ' + str(id) + '.nc' + ' ' + \ ' --user ' + 'maristizabalvar' + ' ' + \ ' --pwd ' + 'MariaCMEMS2018' os.system(motuc) # Check if file was downloaded COP_file = out_dir + '/' + str(id) + '.nc' # Check if file was downloaded resp = os.system('ls ' + out_dir + '/' + str(id) + '.nc') if resp == 0: COP = xr.open_dataset(COP_file) latCOP = np.asarray(COP.latitude[:]) lonCOP = np.asarray(COP.longitude[:]) depth_COP = np.asarray(COP.depth[:]) tCOP = np.asarray(mdates.num2date(mdates.date2num(COP.time[:]))) else: latCOP = np.empty(1) latCOP[:] = np.nan lonCOP = np.empty(1) lonCOP[:] = np.nan tCOP = np.empty(1) tCOP[:] = np.nan oktimeCOP = np.where( mdates.date2num(tCOP) >= mdates.date2num(tini))[0][0] oklonCOP = np.where(lonCOP >= argo_lon[0])[0][0] oklatCOP = np.where(latCOP >= argo_lat[0])[0][0] temp_COP = np.asarray(COP.variables['thetao'][oktimeCOP, :, oklatCOP, oklonCOP]) salt_COP = np.asarray(COP.variables['so'][oktimeCOP, :, oklatCOP, oklonCOP]) # Figure temp plt.figure(figsize=(5, 6)) plt.plot(argo_temp, -argo_pres, '.-', linewidth=2, label='ARGO Float id ' + str(id)) plt.plot(temp_GOFS, -depth_GOFS, '.-', linewidth=2, label='GOFS 3.1', color='red') plt.plot(temp_RTOFS, -depth_RTOFS, '.-', linewidth=2, label='RTOFS', color='g') plt.plot(temp_COP, -depth_COP, '.-', linewidth=2, label='Copernicus', color='darkorchid') plt.ylim([-1000, 0]) plt.title('Temperature Profile on '+ str(argo_time[0])[0:13] + '\n [lon,lat] = [' \ + str(np.round(argo_lon[0],3)) +',' +\ str(np.round(argo_lat[0],3))+']',\ fontsize=16) plt.ylabel('Depth (m)', fontsize=14) plt.xlabel('$^oC$', fontsize=14) plt.legend(loc='lower right', fontsize=14) file = folder_fig + 'ARGO_vs_GOFS_RTOFS_COP_temp_' + str(id) plt.savefig(file, bbox_inches='tight', pad_inches=0.1) # Figure salt plt.figure(figsize=(5, 6)) plt.plot(argo_salt, -argo_pres, '.-', linewidth=2, label='ARGO Float id ' + str(id)) plt.plot(salt_GOFS, -depth_GOFS, '.-', linewidth=2, label='GOFS 3.1', color='red') plt.plot(salt_RTOFS, -depth_RTOFS, '.-', linewidth=2, label='RTOFS', color='g') plt.plot(salt_COP, -depth_COP, '.-', linewidth=2, label='Copernicus', color='darkorchid') plt.ylim([-1000, 0]) plt.title('Salinity Profile on '+ str(argo_time[0])[0:13] + '\n [lon,lat] = [' \ + str(np.round(argo_lon[0],3)) +',' +\ str(np.round(argo_lat[0],3))+']',\ fontsize=16) plt.ylabel('Depth (m)', fontsize=14) plt.legend(loc='lower right', fontsize=14) file = folder_fig + 'ARGO_vs_GOFS_RTOFS_COP_salt_' + str(id) plt.savefig(file, bbox_inches='tight', pad_inches=0.1)
- Met: Total number of met messages released to the GTS - Wave: Total number of wave messages released to the GTS In this notebook we will explore the statistics of the messages IOOS is releasing to GTS. The first step is to download the data. We will use an ERDDAP server that [hosts the CSV files](https://ferret.pmel.noaa.gov/generic/erddap/files/ioos_obs_counts/) with the ingest data. from datetime import date from erddapy import ERDDAP server = "http://osmc.noaa.gov/erddap" e = ERDDAP(server=server, protocol="tabledap") e.dataset_id = "ioos_obs_counts" e.variables = ["time", "locationID", "region", "sponsor", "met", "wave"] e.constraints = { "time>=": "2019-09", "time<": "2020-11", } df = e.to_pandas(parse_dates=True) df["locationID"] = df["locationID"].str.lower() df.tail() The table has all the ingest data from 2019-01-01 to 2020-06-01. We can now explore it grouping the data by IOOS Regional Association (RA). groups = df.groupby("region")
info_url = erd.get_info_url(response='csv') info_df = to_df(info_url) info_df info_df[info_df['Row Type'] == 'variable'] # Take a look at the variables with standard names: variables = erd.get_var_by_attr(standard_name=lambda v: v is not None) variables # These are the standard variables for the CTDBP instrument - specifically for the CP01CNSM-NSIF-CTDBP. Next, lets query the server for _all_ available data from the CP01CNSM-NSIF-CTDBP. erd.variables = variables erd.get_download_url() # Put it all into a dataframe: data = erd.to_pandas() # + # Plot a basic time-series of the conductivity import matplotlib.pyplot as plt import seaborn as sns sns.set(style="darkgrid") # -
protocol='tabledap', ) e.response = 'csv' e.dataset_id = 'OSMC_30day' e.constraints = { 'time>=': str(x.tm_year)+"-"+str(x.tm_mon).zfill(2)+"-"+str(x.tm_mday).zfill(2)+"T"+str(x.tm_hour).zfill(2)+":00:00Z", 'longitude>=': -80.0, 'longitude<=': 80.0, 'platform_type=': "DRIFTING BUOYS (GENERIC)", 'platform_code=': str(wmo_mb[i]), } e.variables = [ 'platform_code', 'time', 'latitude', 'longitude', 'sst', 'slp', ] try: df = e.to_pandas() except: print("Não há dados para o WMO "+str(wmo_mb[i])) try: df.columns = ['id', 'tempo','lat','lon','sst','pres'] df.id=sat[i] dateparse = lambda x: pd.datetime.strptime(x, '%Y-%m-%dT%H:%M:%SZ')
igrid = 1 #%% Reading bathymetry data ncbath = xr.open_dataset(bath_file) bath_lat = ncbath.variables['lat'][:] bath_lon = ncbath.variables['lon'][:] bath_elev = ncbath.variables['elevation'][:] #%% Looping through all gliders found for id in gliders: print('Reading ' + id ) e.dataset_id = id e.constraints = constraints e.variables = variables # chacking data frame is not empty df = e.to_pandas() if len(df.index) != 0 : # Converting glider data to data frame df = e.to_pandas( index_col='time (UTC)', parse_dates=True, skiprows=(1,) # units information can be dropped. ).dropna() # Coverting glider vectors into arrays timeg, ind = np.unique(df.index.values,return_index=True) latg = df['latitude (degrees_north)'].values[ind]
if args.erddap: drifter_years = args.erddap[1:] argos_id = args.erddap[0] e = ERDDAP( server='http://akutan.pmel.noaa.gov:8080/erddap', protocol='tabledap', ) e.response = 'csv' #e.dataset_id = drifter_year + '_Argos_Drifters_NRT' #use this until we can get location quality back into older years #currently it is only in erddap for 2020 and newer #if int(drifter_years[0]) >= 2020: e.variables = [ 'trajectory_id', 'strain', 'voltage', 'time', 'latitude', 'sst', 'longitude', 'location_quality' ] #else: # e.variables = ['trajectory_id','strain', 'voltage', 'time', 'latitude', 'sst', # 'longitude'] e.constraints = {'trajectory_id=': argos_id} df_years = {} for year in drifter_years: e.dataset_id = year + '_Argos_Drifters_NRT' df = e.to_pandas( index_col='time (UTC)', parse_dates=True, skiprows=(1, ) # units information can be dropped. ) df.columns = [x[1].split()[0] for x in enumerate(df.columns)]