def get_coordinates(df, **kw): ''' df = pd.DataFrame(columns=['server','Dataset ID',...]) kw = { 'min_lon': -123.628173, 'max_lon': -122.02382599999999, 'min_lat': 47.25972200000001, 'max_lat': 48.32253399999999, 'min_time': '2018-01-27T00:00:00Z', 'max_time': '2019-12-31T00:00:00Z'} dataset_url = '%s/tabledap/%s.csvp?latitude,longitude,time&longitude>=-72.0&longitude<=-69&latitude>=38&latitude<=41&time>=1278720000.0&time<=1470787200.0&distinct()' % (all_datasets['server'].iloc[int(i)],all_datasets['Dataset ID'].iloc[int(i)]) ''' df_coords = pd.DataFrame() # pick 10 random datasets from our search results: #count = 10 #if df.shape[0] > count: # print("Found %i datasets. Reducing return to %i." % (df.shape[0],count)) # subset_datasets = df.iloc[random.sample(range(0,df.shape[0]),count+1)] #else: # subset_datasets = df # final_dataset_limit = df.shape[0] # alternate approach to above is iterate the original DataFrame passed (df), stopping either # at final_dataset_limit (10 currently) or the max # of rows in df (conclusion of for loop) # previous enclosing while loop is unnecessary as a result final_dataset_limit = 10 datasets_found = 0 if df.shape[0] < final_dataset_limit: final_dataset_limit = df.shape[0] index_random = random.sample(range(0, df.shape[0]), df.shape[0]) print("index_random: {}".format(index_random)) #for i in range(subset_datasets.shape[0]): for i in index_random: server_url = df['server'].iloc[int(i)] dataset_id = df['Dataset ID'].iloc[int(i)] institution = df['Institution'].iloc[int(i)] # skip some difficult datasets for now: if "ROMS" in dataset_id or "DOP" in dataset_id: # skip ROMS model output #print("Skipping %s" % server_url + dataset_id) continue e = ERDDAP(server=server_url, protocol='tabledap', response='csv') try: print("datasets_found: {}".format(datasets_found)) # former config for query, replaced with new code below: #e.variables=["latitude","longitude"]#,"time"] #e.dataset_id = all_datasets['Dataset ID'].iloc[int(i)] #e.constraints = { # "time>=": kw['min_time'], # "time<=": kw['max_time'], # "longitude>=": kw['min_lon'], # "longitude<=": kw['max_lon'], # "latitude>=": kw['min_lat'], # "latitude<=": kw['max_lat'], # "distinct" : () #} # Generate a download URL via e.get_download_url and pass to Pandas DataFrame via read_csv # we need to use e.constraints here rather than in e.get_download_url to allow appending '>=' '<=' to the contstraints keys to match ERDDAP's API # (parameter signature differs from the search API used above) # also add a 'distinct = ()' param, generate a download url, and submit a csv dataset download request to ERDDAP #kw["distinct"] = "()" e.constraints = { "time>=": kw['min_time'], "time<=": kw['max_time'], "longitude>=": kw['min_lon'], "longitude<=": kw['max_lon'], "latitude>=": kw['min_lat'], "latitude<=": kw['max_lat'], "distinct": () } url = e.get_download_url( #constraints=kw, response="csvp", dataset_id=df['Dataset ID'].iloc[int(i)], variables=["latitude", "longitude"]) print("Download URL: {}".format(url)) #coords = pd.read_csv(url, headers=headers) coords = pd.read_csv(url) coords['dataset_count'] = i coords['dataset_download_url'] = url coords['Dataset ID'] = dataset_id coords['Institution'] = institution #get_var_by_attr example (ToDo): #e.get_var_by_attr(dataset_id, standard_name='northward_sea_water_velocity') print(coords.head()) df_coords = pd.concat([df_coords, coords]) # reaching this point in the query means the dataset query was successful, increment # we need to break out of for loop here however if we reach final_dataset_limit to not go over: datasets_found += 1 print("new dataset acquired; datasets_found: {}".format( datasets_found)) # not needed: #df.drop([i]) if datasets_found == final_dataset_limit: break except Exception as ex: # can happen if the dataset does not have any features within the query window, just log it here: if type(ex).__name__ in ["HTTPError"]: print(ex) #raise pass return df_coords
'platform_number', 'time', 'pres', 'longitude', 'latitude', 'temp', 'psal', ] e = ERDDAP(server=url_Argo, protocol='tabledap', response='nc') e.dataset_id = dataset_type e.constraints = constraints e.variables = variables print(e.get_download_url()) df = e.to_pandas( parse_dates=True, skiprows=(1, ) # units information can be dropped. ).dropna() argo_ids = np.asarray(df['platform_number']) argo_times = np.asarray(df['time (UTC)']) #argo_press = np.asarray(df['pres (decibar)']) argo_lons = np.asarray(df['longitude (degrees_east)']) argo_lats = np.asarray(df['latitude (degrees_north)']) #argo_temps = np.asarray(df['temp (degree_Celsius)']) #argo_salts = np.asarray(df['psal (PSU)']) Number_argo_profiles = np.max([np.unique(argo_lons).shape,\
class ErddapReader: def __init__(self, known_server='ioos', protocol=None, server=None, parallel=True): # # run checks for KW # self.kw = kw self.parallel = parallel # either select a known server or input protocol and server string if known_server == 'ioos': protocol = 'tabledap' server = 'http://erddap.sensors.ioos.us/erddap' elif known_server == 'coastwatch': protocol = 'griddap' server = 'http://coastwatch.pfeg.noaa.gov/erddap' elif known_server is not None: statement = 'either select a known server or input protocol and server string' assert (protocol is not None) & (server is not None), statement else: known_server = server.strip('/erddap').strip('http://').replace('.','_') statement = 'either select a known server or input protocol and server string' assert (protocol is not None) & (server is not None), statement self.known_server = known_server self.e = ERDDAP(server=server) self.e.protocol = protocol self.e.server = server # columns for metadata self.columns = ['geospatial_lat_min', 'geospatial_lat_max', 'geospatial_lon_min', 'geospatial_lon_max', 'time_coverage_start', 'time_coverage_end', 'defaultDataQuery', 'subsetVariables', # first works for timeseries sensors, 2nd for gliders 'keywords', # for hf radar 'id', 'infoUrl', 'institution', 'featureType', 'source', 'sourceUrl'] # name self.name = f'erddap_{known_server}' self.reader = 'ErddapReader' # # self.data_type = data_type # self.standard_names = standard_names # # DOESN'T CURRENTLY LIMIT WHICH VARIABLES WILL BE FOUND ON EACH SERVER @property def dataset_ids(self): '''Find dataset_ids for server.''' if not hasattr(self, '_dataset_ids'): # This should be a region search if self.approach == 'region': # find all the dataset ids which we will use to get the data # This limits the search to our keyword arguments in kw which should # have min/max lon/lat/time values dataset_ids = [] if self.variables is not None: for variable in self.variables: # find and save all dataset_ids associated with variable search_url = self.e.get_search_url(response="csv", **self.kw, variableName=variable, items_per_page=10000) try: search = pd.read_csv(search_url) dataset_ids.extend(search["Dataset ID"]) except Exception as e: logger_erd.exception(e) logger_erd.warning(f"variable {variable} was not found in the search") logger_erd.warning(f'search_url: {search_url}') else: # find and save all dataset_ids associated with variable search_url = self.e.get_search_url(response="csv", **self.kw, items_per_page=10000) try: search = pd.read_csv(search_url) dataset_ids.extend(search["Dataset ID"]) except Exception as e: logger_erd.exception(e) logger_erd.warning(f"nothing found in the search") logger_erd.warning(f'search_url: {search_url}') # only need a dataset id once since we will check them each for all standard_names self._dataset_ids = list(set(dataset_ids)) # This should be a search for the station names elif self.approach == 'stations': # elif self._stations is not None: # search by station name for each of stations dataset_ids = [] for station in self._stations: # if station has more than one word, AND will be put between to search for multiple # terms together url = self.e.get_search_url(response="csv", items_per_page=5, search_for=station) try: df = pd.read_csv(url) except Exception as e: logger_erd.exception(e) logger_erd.warning(f'search url {url} did not work for station {station}.') continue # first try for exact station match try: dataset_id = [dataset_id for dataset_id in df['Dataset ID'] if station.lower() in dataset_id.lower().split('_')][0] # if that doesn't work, trying for more general match and just take first returned option except Exception as e: logger_erd.exception(e) logger_erd.warning('When searching for a dataset id to match station name %s, the first attempt to match the id did not work.' % (station)) dataset_id = df.iloc[0]['Dataset ID'] # if 'tabs' in org_id: # don't split # axiom_id = [axiom_id for axiom_id in df['Dataset ID'] if org_id.lower() == axiom_id.lower()] # else: # axiom_id = [axiom_id for axiom_id in df['Dataset ID'] if org_id.lower() in axiom_id.lower().split('_')][0] # except: # dataset_id = None dataset_ids.append(dataset_id) self._dataset_ids = list(set(dataset_ids)) else: logger_erd.warning('Neither stations nor region approach were used in function dataset_ids.') return self._dataset_ids def meta_by_dataset(self, dataset_id): info_url = self.e.get_info_url(response="csv", dataset_id=dataset_id) info = pd.read_csv(info_url) items = [] for col in self.columns: try: item = info[info['Attribute Name'] == col]['Value'].values[0] dtype = info[info['Attribute Name'] == col]['Data Type'].values[0] except: if col == 'featureType': # this column is not present in HF Radar metadata but want it to # map to data_type, so input 'grid' in that case. item = 'grid' else: item = 'NA' if dtype == 'String': pass elif dtype == 'double': item = float(item) elif dtype == 'int': item = int(item) items.append(item) # if self.standard_names is not None: # # In case the variable is named differently from the standard names, # # we back out the variable names here for each dataset. This also only # # returns those names for which there is data in the dataset. # varnames = self.e.get_var_by_attr( # dataset_id=dataset_id, # standard_name=lambda v: v in self.standard_names # ) # else: # varnames = None ## include download link ## self.e.dataset_id = dataset_id if self.e.protocol == 'tabledap': if self.variables is not None: self.e.variables = ["time","longitude", "latitude", "station"] + self.variables # set the same time restraints as before self.e.constraints = {'time<=': self.kw['max_time'], 'time>=': self.kw['min_time'],} download_url = self.e.get_download_url(response='csvp') elif self.e.protocol == 'griddap': # the search terms that can be input for tabledap do not work for griddap # in erddapy currently. Instead, put together an opendap link and then # narrow the dataset with xarray. # get opendap link download_url = self.e.get_download_url(response='opendap') # add erddap server name return {dataset_id: [self.e.server, download_url] + items + [self.variables]} @property def meta(self): if not hasattr(self, '_meta'): if self.parallel: # get metadata for datasets # run in parallel to save time num_cores = multiprocessing.cpu_count() downloads = Parallel(n_jobs=num_cores)( delayed(self.meta_by_dataset)(dataset_id) for dataset_id in self.dataset_ids ) else: downloads = [] for dataset_id in self.dataset_ids: downloads.append(self.meta_by_dataset(dataset_id)) # make dict from individual dicts from collections import ChainMap meta = dict(ChainMap(*downloads)) # Make dataframe of metadata # variable names are the column names for the dataframe self._meta = pd.DataFrame.from_dict(meta, orient='index', columns=['database','download_url'] \ + self.columns + ['variable names']) return self._meta def data_by_dataset(self, dataset_id): download_url = self.meta.loc[dataset_id, 'download_url'] # data variables in ds that are not the variables we searched for # varnames = self.meta.loc[dataset_id, 'variable names'] if self.e.protocol == 'tabledap': try: # fetch metadata if not already present # found download_url from metadata and use dd = pd.read_csv(download_url, index_col=0, parse_dates=True) # Drop cols and rows that are only NaNs. dd = dd.dropna(axis='index', how='all').dropna(axis='columns', how='all') if self.variables is not None: # check to see if there is any actual data # this is a bit convoluted because the column names are the variable names # plus units so can't match 1 to 1. datacols = 0 # number of columns that represent data instead of metadata for col in dd.columns: datacols += [varname in col for varname in self.variables].count(True) # if no datacols, we can skip this one. if datacols == 0: dd = None except Exception as e: logger_erd.exception(e) logger_erd.warning('no data to be read in for %s' % dataset_id) dd = None elif self.e.protocol == 'griddap': try: dd = xr.open_dataset(download_url, chunks='auto').sel(time=slice(self.kw['min_time'],self.kw['max_time'])) if ('min_lat' in self.kw) and ('max_lat' in self.kw): dd = dd.sel(latitude=slice(self.kw['min_lat'],self.kw['max_lat'])) if ('min_lon' in self.kw) and ('max_lon' in self.kw): dd = dd.sel(longitude=slice(self.kw['min_lon'],self.kw['max_lon'])) # use variable names to drop other variables (should. Ido this?) if self.variables is not None: l = set(dd.data_vars) - set(self.variables) dd = dd.drop_vars(l) except Exception as e: logger_erd.exception(e) logger_erd.warning('no data to be read in for %s' % dataset_id) dd = None return (dataset_id, dd) @property def data(self): if not hasattr(self, '_data'): if self.parallel: num_cores = multiprocessing.cpu_count() downloads = Parallel(n_jobs=num_cores)( delayed(self.data_by_dataset)(dataset_id) for dataset_id in self.dataset_ids ) else: downloads = [] for dataset_id in self.dataset_ids: downloads.append(self.data_by_dataset(dataset_id)) # if downloads is not None: dds = {dataset_id: dd for (dataset_id, dd) in downloads} # else: # dds = None self._data = dds return self._data def count(self,url): try: return len(pd.read_csv(url)) except: return np.nan def all_variables(self): '''Return a list of all possible variables.''' file_name_counts = f'erddap_variable_list_{self.known_server}.csv' if os.path.exists(file_name_counts): return pd.read_csv(file_name_counts, index_col='variable') else: # This took 10 min running in parallel for ioos # 2 min for coastwatch url = f'{self.e.server}/categorize/variableName/index.csv?page=1&itemsPerPage=100000' df = pd.read_csv(url) # counts = [] # for url in df.URL: # counts.append(self.count(url)) num_cores = multiprocessing.cpu_count() counts = Parallel(n_jobs=num_cores)( delayed(self.count)(url) for url in df.URL ) dfnew = pd.DataFrame() dfnew['variable'] = df['Category'] dfnew['count'] = counts dfnew = dfnew.set_index('variable') # remove nans if (dfnew.isnull().sum() > 0).values: dfnew = dfnew[~dfnew.isnull().values].astype(int) dfnew.to_csv(file_name_counts) return dfnew def search_variables(self, variables): '''Find valid variables names to use. Call with `search_variables()` to return the list of possible names. Call with `search_variables('salinity')` to return relevant names. ''' if not isinstance(variables, list): variables = [variables] # set up search for input variables search = f"(?i)" for variable in variables: search += f".*{variable}|" search = search.strip('|') r = re.compile(search) # just get the variable names df = self.all_variables() parameters = df.index matches = list(filter(r.match, parameters)) # return parameters that match input variable strings return df.loc[matches].sort_values('count', ascending=False) def check_variables(self, variables, verbose=False): if not isinstance(variables, list): variables = [variables] # parameters = list(self.all_variables().keys()) parameters = list(self.all_variables().index) # for a variable to exactly match a parameter # this should equal 1 count = [] for variable in variables: count += [parameters.count(variable)] condition = np.allclose(count,1) assertion = f'The input variables are not exact matches to ok variables for known_server {self.known_server}. \ \nCheck all parameter group values with `ErddapReader().all_variables()` \ \nor search parameter group values with `ErddapReader().search_variables({variables})`.\ \n\n Try some of the following variables:\n{str(self.search_variables(variables))}'# \ # \nor run `ErddapReader().check_variables("{variables}")' assert condition, assertion if condition and verbose: print('all variables are matches!')
class GdacClient(object): def __init__(self, erddap_url=None): self._logger = logging.getLogger(os.path.basename(__file__)) self._erddap_url = erddap_url or 'https://gliders.ioos.us/erddap' self._protocol = 'tabledap' self._response_type = 'csv' self._items_per_page = 1e10 self._page = 1 self._client = ERDDAP(server=self._erddap_url, protocol=self._protocol, response=self._response_type) self._last_request = None # DataFrame containing the results of ERDDAP advanced search (endpoints, etc.) self._datasets_info = pd.DataFrame() # DataFrame containing dataset_id, start/end dates, profile count, etc. self._datasets_summaries = pd.DataFrame() self._datasets_profiles = pd.DataFrame() self._datasets_days = pd.DataFrame() self._profiles_variables = [ 'time', 'latitude', 'longitude', 'profile_id', 'wmo_id' ] self._valid_search_kwargs = { 'institution', 'ioos_category', 'long_name', 'standard_name', 'variable_name', 'min_lon', 'min_lat', 'max_lon', 'max_lat', 'min_time', 'max_time' } self._months = [ 'January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December' ] self._calendar_types = ['datasets', 'days', 'profiles'] @property def datasets_info(self): return self._datasets_info @property def datasets_summaries(self): return self._datasets_summaries @property def datasets_profiles(self): return self._datasets_profiles @property def datasets_days(self): return self._datasets_days @property def dataset_ids(self): if self._datasets_summaries.empty: self._logger.warning('No data sets found') return return list(self._datasets_info['dataset_id'].values) @property def gliders(self): if self._datasets_summaries.empty: self._logger.warning('No data sets found') return return list(self._datasets_summaries.glider.unique()) @property def profiles_per_yyyymmdd(self): return self._datasets_profiles.sum(axis=1) @property def profiles_per_year(self): return self._datasets_profiles.sum( axis=1).groupby(lambda x: x.year).sum() @property def glider_days_per_yyyymmdd(self): return self._datasets_days.sum(axis=1) @property def glider_days_per_year(self): return self._datasets_days.sum(axis=1).groupby(lambda x: x.year).sum() @property def deployments_per_yyyymmdd(self): return self._datasets_days.sum(axis=1) @property def deployments_per_year(self): return self._datasets_days.groupby(lambda x: x.year).any().sum(axis=1) @property def yearly_counts(self): columns = [ self.deployments_per_year, self.glider_days_per_year, self.profiles_per_year ] totals = pd.DataFrame(columns).transpose().astype('i') totals.columns = ['deployments', 'glider days', 'profiles'] totals.index.name = 'year' return totals @property def e(self): """erddapy.ERDDAP client""" return self._client @property def server(self): return self._client.server @property def response_type(self): return self._client.response @response_type.setter def response_type(self, response_type): self._client.response = response_type @property def last_request(self): return self._last_request def get_glider_datasets(self, glider): return self._datasets_summaries[self._datasets_summaries.glider == glider].reset_index().drop('index', axis=1) def get_deployments_calendar(self, year=None): if not year: return self._datasets_days.groupby( [lambda x: x.year, lambda x: x.month]).any().sum(axis=1).unstack() else: glider_days_by_yymmdd = self._datasets_days years = pd.to_datetime(glider_days_by_yymmdd.index).year.unique() if year not in years: self._logger.warning( 'No glider days found in year {:}'.format(year)) return pd.DataFrame() return glider_days_by_yymmdd[pd.to_datetime( glider_days_by_yymmdd.index).year == year].groupby( [lambda x: x.month, lambda x: x.day]).any().sum(axis=1).unstack() def get_glider_days_calendar(self, year=None): if not year: return self._datasets_days.sum(axis=1).groupby( [lambda x: x.year, lambda x: x.month]).sum().unstack() else: glider_days_by_yymmdd = self._datasets_days.sum(axis=1) years = pd.to_datetime(glider_days_by_yymmdd.index).year.unique() if year not in years: self._logger.warning( 'No glider days found in year {:}'.format(year)) return pd.DataFrame() return glider_days_by_yymmdd[pd.to_datetime( glider_days_by_yymmdd.index).year == year].groupby( [lambda x: x.month, lambda x: x.day]).sum().unstack() def get_profiles_calendar(self, year=None): if not year: return self._datasets_profiles.sum(axis=1).groupby( [lambda x: x.year, lambda x: x.month]).sum().unstack() else: profiles_by_yymmdd = self._datasets_profiles.sum(axis=1) years = pd.to_datetime(profiles_by_yymmdd.index).year.unique() if year not in years: self._logger.warning( 'No profiles found in year {:}'.format(year)) return pd.DataFrame() return profiles_by_yymmdd[pd.to_datetime( profiles_by_yymmdd.index).year == year].groupby( [lambda x: x.month, lambda x: x.day]).sum().unstack() def search_datasets(self, search_for=None, delayedmode=False, **kwargs): """Search the ERDDAP server for glider deployment datasets. Results are stored as pandas DataFrames in: self.deployments self.datasets Equivalent to ERDDAP's Advanced Search. Searches can be performed by free text, bounding box, time bounds, etc. See the erddapy documentation for valid kwargs""" url = self._client.get_search_url(search_for=search_for, **kwargs) self._last_request = url glider_regex = re.compile(r'^(.*)-\d{8}T\d{4}') try: self._datasets_info = pd.read_csv(url) # Drop the allDatasets row self._datasets_info.drop(self._datasets_info[ self._datasets_info['Dataset ID'] == 'allDatasets'].index, inplace=True) # Reset the index to start and 0 self._datasets_info.reset_index(inplace=True) # Drop the index, griddap wms columns self._datasets_info.drop(['index', 'griddap', 'wms'], axis=1, inplace=True) # rename columns more friendly columns = { s: s.replace(' ', '_').lower() for s in self._datasets_info.columns } self._datasets_info.rename(columns=columns, inplace=True) if not delayedmode: self._datasets_info = self._datasets_info[ ~self._datasets_info.dataset_id.str.endswith('delayed')] # Iterate through each data set (except for allDatasets) and grab the info page datasets = [] daily_profiles = [] datasets_days = [] for i, row in self._datasets_info.iterrows(): if row['dataset_id'] == 'allDatasets': continue if delayedmode and not row['dataset_id'].endswith('delayed'): continue elif row['dataset_id'].endswith('delayed'): continue self._logger.info('Fetching dataset: {:}'.format( row['dataset_id'])) # Get the data download url for erddap_vars try: data_url = self._client.get_download_url( dataset_id=row['dataset_id'], variables=self._profiles_variables) except (ConnectionError, ConnectionRefusedError, urllib3.exceptions.MaxRetryError) as e: self._logger.error('{:} fetch failed: {:}'.format( row['dataset_id'], e)) continue # Fetch the profiles into a pandas dataframe try: profiles = pd.read_csv(data_url, skiprows=[1], index_col='time', parse_dates=True).sort_index() except HTTPError as e: self._logger.error( 'Failed to fetch profiles: {:}'.format(e)) continue # Group profiles by yyyy-mm-dd and sum the number of profiles per day s = profiles.profile_id.dropna().groupby( lambda x: x.date).count() s.name = row['dataset_id'] daily_profiles.append(s) # Create the deployment date range d_index = pd.date_range(s.index.min(), s.index.max()) deployment_days = pd.Series([1 for x in d_index], index=d_index, name=row['dataset_id']) datasets_days.append(deployment_days) glider_match = glider_regex.match(row['dataset_id']) glider = glider_match.groups()[0] # First profile time dt0 = profiles.index.min() # Last profile time dt1 = profiles.index.max() # Deployment length in days days = ceil((dt1 - dt0).total_seconds() / 86400) dataset_summary = [ glider, row['dataset_id'], str(profiles.wmo_id.unique()[0]), dt0, dt1, profiles.iloc[0]['latitude'], profiles.iloc[0]['longitude'], profiles.latitude.min(), profiles.latitude.max(), profiles.longitude.min(), profiles.longitude.max(), profiles.shape[0], days ] datasets.append(dataset_summary) columns = [ 'glider', 'dataset_id', 'wmo_id', 'start_date', 'end_date', 'deployment_lat', 'deployment_lon', 'lat_min', 'lat_max', 'lon_min', 'lon_max', 'num_profiles', 'days' ] self._datasets_summaries = pd.DataFrame(datasets, columns=columns) # Create and store the DataFrame containing a 1 on each day the glider was deployed, 0 otherwise self._datasets_days = pd.concat(datasets_days, axis=1).sort_index() # Create and store the DataFrame containing the number of profiles on each day for each deployment self._datasets_profiles = pd.concat(daily_profiles, axis=1).sort_index() except HTTPError as e: self._logger.error(e) return def get_dataset_info(self, dataset_id): """Fetch the dataset metadata for the specified dataset_id""" if dataset_id not in self.dataset_ids: self._logger.error('Dataset id {:} not found in {:}'.format( dataset_id, self.__repr__())) return info = self._datasets_info[self._datasets_info.dataset_id == dataset_id] info.reset_index(inplace=True) return info.drop('index', axis=1).transpose() def get_dataset_profiles(self, dataset_id): """Fetch all profiles (time, latitude, longitude, profile_id) for the specified dataset. Profiles are sorted by ascending time""" if dataset_id not in self.dataset_ids: self._logger.error('Dataset id {:} not found in {:}'.format( dataset_id, self.__repr__())) return url = self._client.get_download_url(dataset_id=dataset_id, variables=self._profiles_variables) return pd.read_csv(url, parse_dates=True, skiprows=[1], index_col='time').sort_index() def get_dataset_time_coverage(self, dataset_id): """Get the time coverage and wmo id (if specified) for specified dataset_id """ if dataset_id not in self.dataset_ids: self._logger.error('Dataset id {:} not found in {:}'.format( dataset_id, self.__repr__())) return return self._datasets_summaries[[ 'dataset_id', 'start_date', 'end_date', 'wmo_id' ]].iloc[self.dataset_ids.index(dataset_id)] def get_dataset_time_series(self, dataset_id, variables, min_time=None, max_time=None): """Fetch the variables time-series for the specified dataset_id. A time window can be specified using min_time and max_time, which must be ISO-8601 formatted date strings (i.e.: 'YYYY-mm-ddTHH:MM') Parameters dataset_id: valid dataset id from self.datasets variables: list of one or more valid variables in the dataset Options min_time: minimum time value formatted as 'YYYY-mm-ddTHH:MM[:SS]' max_time: maximum time value formatted as 'YYYY-mm-ddTHH:mm[:SS]' """ if dataset_id not in self.dataset_ids: self._logger.error('Dataset id {:} not found in {:}'.format( dataset_id, self.__repr__())) return if not isinstance(variables, list): variables = [variables] all_variables = ['precise_time', 'time', 'depth'] + variables variables = set(all_variables) constraints = {} if min_time: constraints['precise_time>='] = min_time if max_time: constraints['precise_time<='] = max_time # Not sure why, but pd.read_csv doesn't like percent UNENCODED urls on data requests, so percent escape special # characters prior to sending the data request. data_url = self.encode_url( self._client.get_download_url(dataset_id=dataset_id, variables=variables, constraints=constraints)) return pd.read_csv( data_url, skiprows=[1], parse_dates=True).set_index('precise_time').sort_index() def plot_yearly_totals(self, totals_type=None, palette='Blues_d', **kwargs): """Bar chart plot of deployments, glider days and profiles, grouped by year""" totals = self.yearly_counts.reset_index() if totals_type and totals_type not in totals.columns: self._logger.error( 'Invalid category specified: {:}'.format(totals_type)) return if not totals_type: fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(8.5, 11), sharex=True) sns.barplot(x='year', y='deployments', ax=ax1, data=totals, palette=palette, **kwargs) sns.barplot(x='year', y='glider days', ax=ax2, data=totals, palette=palette, **kwargs) sns.barplot(x='year', y='profiles', ax=ax3, data=totals, palette=palette, **kwargs) ax2.set_xlabel('') ax1.set_xlabel('') ax1.set_title('U.S. IOOS Glider Data Assembly Center') return fig, ax1, ax2, ax3 else: ax = sns.barplot(x='year', y=totals_type, data=totals, palette=palette, **kwargs) ax.set_title('U.S. IOOS Glider Data Assembly Center') return ax.figure, ax def plot_datasets_calendar(self, calendar_type, year=None, cmap=None): """Heatmap of the specified calendar_type""" if calendar_type not in self._calendar_types: self._logger.error( 'Invalid calendar type specified: {:}'.format(calendar_type)) return if calendar_type == 'datasets': if not year: data = self.get_deployments_calendar() title = 'Active Real-Time Datasets' else: data = self.get_deployments_calendar(year) title = 'Active Real-Time Datasets: {:}'.format(year) elif calendar_type == 'days': if not year: data = self.get_glider_days_calendar() data.columns = self._months title = 'Glider In-Water Days' else: data = self.get_glider_days_calendar(year) title = 'Glider In-Water Days: {:}'.format(year) elif calendar_type == 'profiles': if not year: data = self.get_profiles_calendar() data.columns = self._months title = 'Real-Time Profiles' else: data = self.get_profiles_calendar(year) title = 'Real-Time Profiles: {:}'.format(year) else: self._logger.error( 'Unknown calendar type: {:}'.format(calendar_type)) return if data.empty: self._logger.warning('No results found') return if year: data.index = self._months plt.figure(figsize=(8.5, 4.)) cb = True annotate = False else: data.columns = self._months plt.figure(figsize=(8.5, 8.5)) cb = False annotate = True if cmap: ax = sns.heatmap(data, annot=annotate, fmt='.0f', square=True, cbar=cb, linewidths=0.5, cmap=cmap) else: ax = sns.heatmap(data, annot=annotate, fmt='.0f', square=True, cbar=cb, linewidths=0.5) ax.invert_yaxis() _ = [ytick.set_rotation(0) for ytick in ax.get_yticklabels()] ax.set_title(title) return ax def plot_dataset_profiles_calendar(self, dataset_id, **heatmap_kwargs): """Plot the heatmap profiles/day calendar for the specified dataset""" if dataset_id not in self.dataset_ids: self._logger.error('Dataset id {:} not found in {:}'.format( dataset_id, self.__repr__())) return profiles = self.get_dataset_profiles(dataset_id) if profiles.empty: self._logger.warning( 'No profiles found for dataset: {:}'.format(dataset_id)) return pgroup = profiles.latitude.groupby( [lambda x: x.year, lambda x: x.month, lambda x: x.day]).count() calendar = pgroup.unstack() annotate = True square = True cbar = False annot_kws = {'fontsize': 10} annot_kws = {} fig = plt.figure(figsize=(11, 8.5)) ax = sns.heatmap(calendar, annot=annotate, fmt='.0f', square=square, cbar=cbar, linewidths=0.5, annot_kws=annot_kws) # Format default y-tick labels to 'mmm YYYY' ylabels = [y.get_text() for y in ax.get_yticklabels()] new_ylabels = [] for ylabel in ylabels: y, m = ylabel.split('-') new_ylabels.append('{:} {:}'.format(self._months[int(m) - 1][0:3], y)) ax.set_yticklabels(new_ylabels) ax.set_ylabel('') ax.invert_yaxis() _ = [ytick.set_rotation(0) for ytick in ax.get_yticklabels()] ax.set_title('Profiles: {:}'.format(dataset_id)) return ax @staticmethod def encode_url(data_url): """Percent encode special url characters.""" url_pieces = list(urlsplit(data_url)) url_pieces[3] = quote(url_pieces[3]) return urlunsplit(url_pieces) def __repr__(self): return "<GdacClient(server='{:}', response='{:}', num_datasets={:})>".format( self._client.server, self._client.response, len(self._datasets_info))
'time', ] # In[2]: e = ERDDAP( server=server, dataset_id=dataset_id, constraints=constraints, variables=variables, protocol='tabledap', response='mat', ) print(e.get_download_url()) # # Obtaining the data # # There are a few methods to obtain the data with *to_pandas()* and *to_xarray()*: # In[3]: df = e.to_pandas( index_col='time', parse_dates=True, skiprows=(1,) # units information can be dropped. ).dropna()
def get_standard_variables_and_metadata(server_link, standard_variable_list): # Get access to the server and find datasets associated with standard_name variable listed e = ERDDAP(server=server_link, protocol='tabledap', response='csv') # Define Filter for which datasets to look into kw = { 'standard_name': ','.join(standard_variable_list), 'min_lon': -180.0, 'max_lon': 180.0, 'min_lat': -90.0, 'max_lat': 90.0, 'min_time': '', 'max_time': '', 'cdm_data_type': '' } variable_to_groupby = [('latitude', 'degrees_north'), ('longitude', 'degrees_east')] # Get available datasets from that server search_url = e.get_search_url(response='csv', **kw) datasets = pd.read_csv(search_url) # Print results print(e.server) print( str(len(datasets)) + " datasets contains " + ', '.join(standard_variable_list)) # Loop through different data sets and create a metadata dataFrame df = pd.DataFrame(columns=['Dataset ID']) for index, row in datasets.iterrows(): # Get Info from dataset (mostly min/max lat/long) print(row['Dataset ID']) info_url = e.get_info_url(dataset_id=row['Dataset ID'], response='csv') info = pd.read_csv(info_url) attribute_table = info.set_index( ['Row Type', 'Variable Name', 'Attribute Name']).transpose()['attribute'] # Try to get the distinct lat/long and time and depth range for that dataset, if it fails rely on the # ERDDAP metadata try: # If dataset is spread out geographically find distinct locations (may not work well for trajectory data) latlong_url = e.get_download_url( dataset_id=row['Dataset ID'], protocol='tabledap', variables=['latitude', 'longitude', 'time']) # Get add to the url commands to get distinct values and ordered with min and max time for each lat/long distinctMinMaxTime_url = latlong_url + '&distinct()&orderByMinMax(%22latitude%2Clongitude%2Ctime%22)' # Get lat/long and min/max depth for this dataset data = pd.read_csv(distinctMinMaxTime_url, header=[0, 1]) # Group data by latitude/longitude and get min max values data_reduced = data.groupby(by=variable_to_groupby).agg( ['min', 'max']).reset_index() if info[(info['Variable Name'] == 'depth')].size > 0: latlongdepth_url = e.get_download_url( dataset_id=row['Dataset ID'], protocol='tabledap', variables=['latitude', 'longitude', 'depth']) # Get add to the url commands to get distinct values and ordered with min and max depth for # each lat/long distinctMinMaxDepth_url = latlongdepth_url + \ '&distinct()&orderByMinMax(%22latitude%2Clongitude%2Cdepth%22)' # Get lat/long and min/max depth for this dataset data_depth = pd.read_csv(distinctMinMaxDepth_url, header=[0, 1]) # Group depth data by lat/long and get min max values data_depth_reduced = data_depth.groupby( by=variable_to_groupby).agg(['min', 'max']).reset_index() # Merge depth values with time data_reduced = data_reduced.merge(data_depth_reduced, on=variable_to_groupby, how='left') # Merge multi index column names data_reduced.columns = data_reduced.columns.map( ' '.join).str.strip(' ') except Exception as exception_error: print('Failed to read: ' + str(exception_error)) # If there's only one location, it could get the range from metadata # Find lat/long range of this dataset, if it's point we don't need to look into it min_latitude = float(attribute_table['NC_GLOBAL', 'geospatial_lat_min'].Value) max_latitude = float(attribute_table['NC_GLOBAL', 'geospatial_lat_max'].Value) min_longitude = float(attribute_table['NC_GLOBAL', 'geospatial_lon_min'].Value) max_longitude = float(attribute_table['NC_GLOBAL', 'geospatial_lon_max'].Value) # If min/max lat/long are the same don't go in the dataset if (min_latitude == max_latitude) & (min_longitude == max_longitude): data_reduced = pd.DataFrame(columns=['Dataset ID']) data_reduced = {} data_reduced['latitude degrees_north'] = min_latitude data_reduced['longitude degrees_east'] = min_longitude if 'depth' in attribute_table.columns and 'actual_range' in attribute_table[ 'depth'] and ('m' == attribute_table['depth', 'units']['Value']): depth_range = np.array( str.split( attribute_table['depth', 'actual_range']['Value'], ',')).astype(np.float) data_reduced['depth m min'] = depth_range[0] data_reduced['depth m max'] = depth_range[1] # Convert to DataFrame data_reduced = pd.DataFrame(data_reduced, index=[0]) print('Retrieved metadata') else: # Won't handle data with multiple location that it can't retrieve the data continue # Add Standard Name Variable Name to table info['Attribute Name'] == 'geospatial_lat_min' for var in standard_variable_list: data_reduced[var] = ','.join( e.get_var_by_attr(dataset_id=row['Dataset ID'], standard_name=var)) # Add cdm_data_type to table data_reduced['cdm_data_type'] = ','.join( info[info['Attribute Name'] == 'cdm_data_type']['Value'].values) # Add Dataset id to the table data_reduced['Dataset ID'] = row['Dataset ID'] # Merge that dataset ID with previously downloaded data df = df.append(data_reduced) # Add server to dataFrame df['server'] = e.server # Save resulting dataframe to a CSV, file name is based on the server address file_name = re.sub('https*://', '', e.server) file_name = re.sub("[\./]", '_', file_name) file_name = 'Server_List_' + file_name + '.csv' print('Save result to ' + file_name) df.to_csv(file_name) return df
def GOFS_RTOFS_vs_Argo_floats(lon_forec_track, lat_forec_track, lon_forec_cone, lat_forec_cone, lon_best_track, lat_best_track, lon_lim, lat_lim, folder_fig): #%% User input #GOFS3.1 output model location url_GOFS_ts = 'http://tds.hycom.org/thredds/dodsC/GLBy0.08/expt_93.0/ts3z' # RTOFS files folder_RTOFS = '/home/coolgroup/RTOFS/forecasts/domains/hurricanes/RTOFS_6hourly_North_Atlantic/' nc_files_RTOFS = ['rtofs_glo_3dz_f006_6hrly_hvr_US_east.nc',\ 'rtofs_glo_3dz_f012_6hrly_hvr_US_east.nc',\ 'rtofs_glo_3dz_f018_6hrly_hvr_US_east.nc',\ 'rtofs_glo_3dz_f024_6hrly_hvr_US_east.nc'] # COPERNICUS MARINE ENVIRONMENT MONITORING SERVICE (CMEMS) url_cmems = 'http://nrt.cmems-du.eu/motu-web/Motu' service_id = 'GLOBAL_ANALYSIS_FORECAST_PHY_001_024-TDS' product_id = 'global-analysis-forecast-phy-001-024' depth_min = '0.493' out_dir = '/home/aristizabal/crontab_jobs' # Bathymetry file #bath_file = '/Users/aristizabal/Desktop/MARACOOS_project/Maria_scripts/nc_files/GEBCO_2014_2D_-100.0_0.0_-60.0_45.0.nc' bath_file = '/home/aristizabal/bathymetry_files/GEBCO_2014_2D_-100.0_0.0_-10.0_50.0.nc' # Argo floats url_Argo = 'http://www.ifremer.fr/erddap' #%% from matplotlib import pyplot as plt import numpy as np import xarray as xr import netCDF4 from datetime import datetime, timedelta import cmocean import matplotlib.dates as mdates from erddapy import ERDDAP import pandas as pd import os # Do not produce figures on screen plt.switch_backend('agg') # Increase fontsize of labels globally plt.rc('xtick', labelsize=14) plt.rc('ytick', labelsize=14) plt.rc('legend', fontsize=14) #%% Reading bathymetry data ncbath = xr.open_dataset(bath_file) bath_lat = ncbath.variables['lat'][:] bath_lon = ncbath.variables['lon'][:] bath_elev = ncbath.variables['elevation'][:] oklatbath = np.logical_and(bath_lat >= lat_lim[0], bath_lat <= lat_lim[-1]) oklonbath = np.logical_and(bath_lon >= lon_lim[0], bath_lon <= lon_lim[-1]) bath_latsub = bath_lat[oklatbath] bath_lonsub = bath_lon[oklonbath] bath_elevs = bath_elev[oklatbath, :] bath_elevsub = bath_elevs[:, oklonbath] #%% Get time bounds for current day #ti = datetime.today() ti = datetime.today() - timedelta(1) - timedelta(hours=6) tini = datetime(ti.year, ti.month, ti.day) te = ti + timedelta(2) tend = datetime(te.year, te.month, te.day) #%% Look for Argo datasets e = ERDDAP(server=url_Argo) # Grab every dataset available #datasets = pd.read_csv(e.get_search_url(response='csv', search_for='all')) kw = { 'min_lon': lon_lim[0], 'max_lon': lon_lim[1], 'min_lat': lat_lim[0], 'max_lat': lat_lim[1], 'min_time': str(tini), 'max_time': str(tend), } search_url = e.get_search_url(response='csv', **kw) # Grab the results search = pd.read_csv(search_url) # Extract the IDs dataset = search['Dataset ID'].values msg = 'Found {} Datasets:\n\n{}'.format print(msg(len(dataset), '\n'.join(dataset))) dataset_type = dataset[0] constraints = { 'time>=': str(tini), 'time<=': str(tend), 'latitude>=': lat_lim[0], 'latitude<=': lat_lim[1], 'longitude>=': lon_lim[0], 'longitude<=': lon_lim[1], } variables = [ 'platform_number', 'time', 'pres', 'longitude', 'latitude', 'temp', 'psal', ] e = ERDDAP(server=url_Argo, protocol='tabledap', response='nc') e.dataset_id = dataset_type e.constraints = constraints e.variables = variables print(e.get_download_url()) df = e.to_pandas( parse_dates=True, skiprows=(1, ) # units information can be dropped. ).dropna() argo_ids = np.asarray(df['platform_number']) argo_times = np.asarray(df['time (UTC)']) argo_press = np.asarray(df['pres (decibar)']) argo_lons = np.asarray(df['longitude (degrees_east)']) argo_lats = np.asarray(df['latitude (degrees_north)']) argo_temps = np.asarray(df['temp (degree_Celsius)']) argo_salts = np.asarray(df['psal (PSU)']) #%% GOGF 3.1 try: GOFS_ts = xr.open_dataset(url_GOFS_ts, decode_times=False) lt_GOFS = np.asarray(GOFS_ts['lat'][:]) ln_GOFS = np.asarray(GOFS_ts['lon'][:]) tt = GOFS_ts['time'] t_GOFS = netCDF4.num2date(tt[:], tt.units) depth_GOFS = np.asarray(GOFS_ts['depth'][:]) except Exception as err: print(err) GOFS_ts = np.nan lt_GOFS = np.nan ln_GOFS = np.nan depth_GOFS = np.nan t_GOFS = ti #%% Map Argo floats lev = np.arange(-9000, 9100, 100) plt.figure() plt.contourf(bath_lonsub, bath_latsub, bath_elevsub, lev, cmap=cmocean.cm.topo) plt.plot(lon_forec_track, lat_forec_track, '.-', color='gold') plt.plot(lon_forec_cone, lat_forec_cone, '.-b', markersize=1) plt.plot(lon_best_track, lat_best_track, 'or', markersize=3) argo_idd = np.unique(argo_ids) for i, id in enumerate(argo_idd): okind = np.where(argo_ids == id)[0] plt.plot(np.unique(argo_lons[okind]), np.unique(argo_lats[okind]), 's', color='darkorange', markersize=5, markeredgecolor='k') plt.title('Argo Floats ' + str(tini)[0:13] + '-' + str(tend)[0:13], fontsize=16) plt.axis('scaled') plt.xlim(lon_lim[0], lon_lim[1]) plt.ylim(lat_lim[0], lat_lim[1]) file = folder_fig + 'ARGO_lat_lon' #file = folder_fig + 'ARGO_lat_lon_' + str(np.unique(argo_times)[0])[0:10] plt.savefig(file, bbox_inches='tight', pad_inches=0.1) #%% Figure argo float vs GOFS and vs RTOFS argo_idd = np.unique(argo_ids) for i, id in enumerate(argo_idd): print(id) okind = np.where(argo_ids == id)[0] argo_time = np.asarray([ datetime.strptime(t, '%Y-%m-%dT%H:%M:%SZ') for t in argo_times[okind] ]) argo_lon = argo_lons[okind] argo_lat = argo_lats[okind] argo_pres = argo_press[okind] argo_temp = argo_temps[okind] argo_salt = argo_salts[okind] # GOFS print('Retrieving variables from GOFS') if isinstance(GOFS_ts, float): temp_GOFS = np.nan salt_GOFS = np.nan else: #oktt_GOFS = np.where(t_GOFS >= argo_time[0])[0][0] ttGOFS = np.asarray([ datetime(t_GOFS[i].year, t_GOFS[i].month, t_GOFS[i].day, t_GOFS[i].hour) for i in np.arange(len(t_GOFS)) ]) tstamp_GOFS = [ mdates.date2num(ttGOFS[i]) for i in np.arange(len(ttGOFS)) ] oktt_GOFS = np.unique( np.round( np.interp(mdates.date2num(argo_time[0]), tstamp_GOFS, np.arange(len(tstamp_GOFS)))).astype(int))[0] oklat_GOFS = np.where(lt_GOFS >= argo_lat[0])[0][0] oklon_GOFS = np.where(ln_GOFS >= argo_lon[0] + 360)[0][0] temp_GOFS = np.asarray(GOFS_ts['water_temp'][oktt_GOFS, :, oklat_GOFS, oklon_GOFS]) salt_GOFS = np.asarray(GOFS_ts['salinity'][oktt_GOFS, :, oklat_GOFS, oklon_GOFS]) # RTOFS #Time window year = int(argo_time[0].year) month = int(argo_time[0].month) day = int(argo_time[0].day) tini = datetime(year, month, day) tend = tini + timedelta(days=1) # Read RTOFS grid and time print('Retrieving coordinates from RTOFS') if tini.month < 10: if tini.day < 10: fol = 'rtofs.' + str(tini.year) + '0' + str( tini.month) + '0' + str(tini.day) else: fol = 'rtofs.' + str(tini.year) + '0' + str(tini.month) + str( tini.day) else: if tini.day < 10: fol = 'rtofs.' + str(tini.year) + str(tini.month) + '0' + str( tini.day) else: fol = 'rtofs.' + str(tini.year) + str(tini.month) + str( tini.day) ncRTOFS = xr.open_dataset(folder_RTOFS + fol + '/' + nc_files_RTOFS[0]) latRTOFS = np.asarray(ncRTOFS.Latitude[:]) lonRTOFS = np.asarray(ncRTOFS.Longitude[:]) depth_RTOFS = np.asarray(ncRTOFS.Depth[:]) tRTOFS = [] for t in np.arange(len(nc_files_RTOFS)): ncRTOFS = xr.open_dataset(folder_RTOFS + fol + '/' + nc_files_RTOFS[t]) tRTOFS.append(np.asarray(ncRTOFS.MT[:])[0]) tRTOFS = np.asarray([mdates.num2date(mdates.date2num(tRTOFS[t])) \ for t in np.arange(len(nc_files_RTOFS))]) oktt_RTOFS = np.where( mdates.date2num(tRTOFS) >= mdates.date2num(argo_time[0]))[0][0] oklat_RTOFS = np.where(latRTOFS[:, 0] >= argo_lat[0])[0][0] oklon_RTOFS = np.where(lonRTOFS[0, :] >= argo_lon[0])[0][0] nc_file = folder_RTOFS + fol + '/' + nc_files_RTOFS[oktt_RTOFS] ncRTOFS = xr.open_dataset(nc_file) #time_RTOFS = tRTOFS[oktt_RTOFS] temp_RTOFS = np.asarray(ncRTOFS.variables['temperature'][0, :, oklat_RTOFS, oklon_RTOFS]) salt_RTOFS = np.asarray(ncRTOFS.variables['salinity'][0, :, oklat_RTOFS, oklon_RTOFS]) #lon_RTOFS = lonRTOFS[0,oklon_RTOFS] #lat_RTOFS = latRTOFS[oklat_RTOFS,0] # Downloading and reading Copernicus output motuc = 'python -m motuclient --motu ' + url_cmems + \ ' --service-id ' + service_id + \ ' --product-id ' + product_id + \ ' --longitude-min ' + str(argo_lon[0]-2/12) + \ ' --longitude-max ' + str(argo_lon[0]+2/12) + \ ' --latitude-min ' + str(argo_lat[0]-2/12) + \ ' --latitude-max ' + str(argo_lat[0]+2/12) + \ ' --date-min ' + '"' + str(tini-timedelta(0.5)) + '"' + \ ' --date-max ' + '"' + str(tend+timedelta(0.5)) + '"' + \ ' --depth-min ' + depth_min + \ ' --depth-max ' + str(np.nanmax(argo_pres)+1000) + \ ' --variable ' + 'thetao' + ' ' + \ ' --variable ' + 'so' + ' ' + \ ' --out-dir ' + out_dir + \ ' --out-name ' + str(id) + '.nc' + ' ' + \ ' --user ' + 'maristizabalvar' + ' ' + \ ' --pwd ' + 'MariaCMEMS2018' os.system(motuc) # Check if file was downloaded COP_file = out_dir + '/' + str(id) + '.nc' # Check if file was downloaded resp = os.system('ls ' + out_dir + '/' + str(id) + '.nc') if resp == 0: COP = xr.open_dataset(COP_file) latCOP = np.asarray(COP.latitude[:]) lonCOP = np.asarray(COP.longitude[:]) depth_COP = np.asarray(COP.depth[:]) tCOP = np.asarray(mdates.num2date(mdates.date2num(COP.time[:]))) else: latCOP = np.empty(1) latCOP[:] = np.nan lonCOP = np.empty(1) lonCOP[:] = np.nan tCOP = np.empty(1) tCOP[:] = np.nan oktimeCOP = np.where( mdates.date2num(tCOP) >= mdates.date2num(tini))[0][0] oklonCOP = np.where(lonCOP >= argo_lon[0])[0][0] oklatCOP = np.where(latCOP >= argo_lat[0])[0][0] temp_COP = np.asarray(COP.variables['thetao'][oktimeCOP, :, oklatCOP, oklonCOP]) salt_COP = np.asarray(COP.variables['so'][oktimeCOP, :, oklatCOP, oklonCOP]) # Figure temp plt.figure(figsize=(5, 6)) plt.plot(argo_temp, -argo_pres, '.-', linewidth=2, label='ARGO Float id ' + str(id)) plt.plot(temp_GOFS, -depth_GOFS, '.-', linewidth=2, label='GOFS 3.1', color='red') plt.plot(temp_RTOFS, -depth_RTOFS, '.-', linewidth=2, label='RTOFS', color='g') plt.plot(temp_COP, -depth_COP, '.-', linewidth=2, label='Copernicus', color='darkorchid') plt.ylim([-1000, 0]) plt.title('Temperature Profile on '+ str(argo_time[0])[0:13] + '\n [lon,lat] = [' \ + str(np.round(argo_lon[0],3)) +',' +\ str(np.round(argo_lat[0],3))+']',\ fontsize=16) plt.ylabel('Depth (m)', fontsize=14) plt.xlabel('$^oC$', fontsize=14) plt.legend(loc='lower right', fontsize=14) file = folder_fig + 'ARGO_vs_GOFS_RTOFS_COP_temp_' + str(id) plt.savefig(file, bbox_inches='tight', pad_inches=0.1) # Figure salt plt.figure(figsize=(5, 6)) plt.plot(argo_salt, -argo_pres, '.-', linewidth=2, label='ARGO Float id ' + str(id)) plt.plot(salt_GOFS, -depth_GOFS, '.-', linewidth=2, label='GOFS 3.1', color='red') plt.plot(salt_RTOFS, -depth_RTOFS, '.-', linewidth=2, label='RTOFS', color='g') plt.plot(salt_COP, -depth_COP, '.-', linewidth=2, label='Copernicus', color='darkorchid') plt.ylim([-1000, 0]) plt.title('Salinity Profile on '+ str(argo_time[0])[0:13] + '\n [lon,lat] = [' \ + str(np.round(argo_lon[0],3)) +',' +\ str(np.round(argo_lat[0],3))+']',\ fontsize=16) plt.ylabel('Depth (m)', fontsize=14) plt.legend(loc='lower right', fontsize=14) file = folder_fig + 'ARGO_vs_GOFS_RTOFS_COP_salt_' + str(id) plt.savefig(file, bbox_inches='tight', pad_inches=0.1)