def N_POINTS(self): try: ncHeader = urlopen(self.url.replace( '.csv', '.ncHeader')).read().decode("utf-8") l = [line for line in ncHeader.splitlines() if 'row = ' in line][0] return int(l.split('=')[1].split(';')[0]) except: pass
def to_xarray(self): """ Load Argo data and return a xarray.DataSet """ # Try to load cached file if requested: if self.cache and os.path.exists(self.cachepath): ds = xr.open_dataset(self.cachepath) ds = ds.argo.cast_types() # Cast data types return ds # No cache found or requested, so we compute: # Download data: get a csv, open it as pandas dataframe, convert it to xarray dataset df = pd.read_csv(urlopen(self.url), parse_dates=['time'], skiprows=[1], dtype=self._dtype) df['time'] = df['time'].dt.tz_localize(None) # Remove time zone information, we are in UTC, done. ds = xr.Dataset.from_dataframe(df) # Post-process the xarray.DataSet: # Set coordinates: coords = ('latitude', 'longitude', 'time') # Convert all coordinate variable names to upper case for v in ds.data_vars: if v not in coords: ds = ds.rename({v: v.upper()}) ds = ds.set_coords(coords) # Cast data types and add variable attributes (not available in the csv download): ds = ds.argo.cast_types() ds = self._add_attributes(ds) # More convention: # ds = ds.rename({'pres': 'pressure'}) # Add useful attributes to the dataset: if self.dataset_id == 'phy': ds.attrs['DATA_ID'] = 'ARGO' elif self.dataset_id == 'ref': ds.attrs['DATA_ID'] = 'ARGO_Reference' if self.dataset_id == 'bgc': ds.attrs['DATA_ID'] = 'ARGO-BGC' ds.attrs['DOI'] = 'http://doi.org/10.17882/42182' ds.attrs['Downloaded_from'] = self.erddap.server ds.attrs['Downloaded_by'] = getpass.getuser() ds.attrs['Download_date'] = pd.to_datetime('now').strftime('%Y/%m/%d') ds.attrs['Download_url'] = self.url ds.attrs['Download_constraints'] = self.cname() ds = ds[np.sort(ds.data_vars)] # Possibly save in cache for later re-use if self.cache: ds.attrs['cache'] = self.cachepath ds.to_netcdf(self.cachepath) # return ds
def to_dataframe(self): """ Load Argo index and return a pandas dataframe """ # Try to load cached file if requested: if self.cache and os.path.exists(self.cachepath): ds = pd.read_csv(self.cachepath) return ds # No cache found or requested, so we compute: # Download data: get a csv, open it as pandas dataframe, create wmo field df = pd.read_csv(urlopen(self.url), parse_dates=True, skiprows=[1]) # erddap date format : 2019-03-21T00:00:35Z df['date'] = pd.to_datetime(df['date'], format="%Y-%m-%dT%H:%M:%SZ") df['date_update'] = pd.to_datetime(df['date_update'], format="%Y-%m-%dT%H:%M:%SZ") df['wmo'] = df.file.apply(lambda x: int(x.split('/')[1])) # # institution & profiler mapping try: institution_dictionnary = load_dict('institutions') df['tmp1'] = df.institution.apply( lambda x: mapp_dict(institution_dictionnary, x)) profiler_dictionnary = load_dict('profilers') df['tmp2'] = df.profiler_type.apply( lambda x: mapp_dict(profiler_dictionnary, x)) df = df.drop(columns=['institution', 'profiler_type']) df = df.rename(columns={ "tmp1": "institution", "tmp2": "profiler_type" }) except: pass # Possibly save in cache for later re-use if self.cache: df.to_csv(self.cachepath, index=False) return df