Ejemplo n.º 1
0
 def N_POINTS(self):
     try:
         ncHeader = urlopen(self.url.replace(
             '.csv', '.ncHeader')).read().decode("utf-8")
         l = [line for line in ncHeader.splitlines() if 'row = ' in line][0]
         return int(l.split('=')[1].split(';')[0])
     except:
         pass
Ejemplo n.º 2
0
    def to_xarray(self):
        """ Load Argo data and return a xarray.DataSet """

        # Try to load cached file if requested:
        if self.cache and os.path.exists(self.cachepath):
            ds = xr.open_dataset(self.cachepath)
            ds = ds.argo.cast_types() # Cast data types
            return ds
        # No cache found or requested, so we compute:

        # Download data: get a csv, open it as pandas dataframe, convert it to xarray dataset
        df = pd.read_csv(urlopen(self.url), parse_dates=['time'], skiprows=[1], dtype=self._dtype)
        df['time'] = df['time'].dt.tz_localize(None) # Remove time zone information, we are in UTC, done.
        ds = xr.Dataset.from_dataframe(df)

        # Post-process the xarray.DataSet:

        # Set coordinates:
        coords = ('latitude', 'longitude', 'time')
        # Convert all coordinate variable names to upper case
        for v in ds.data_vars:
            if v not in coords:
                ds = ds.rename({v: v.upper()})
        ds = ds.set_coords(coords)

        # Cast data types and add variable attributes (not available in the csv download):
        ds = ds.argo.cast_types()
        ds = self._add_attributes(ds)

        # More convention:
        #         ds = ds.rename({'pres': 'pressure'})

        # Add useful attributes to the dataset:
        if self.dataset_id == 'phy':
            ds.attrs['DATA_ID'] = 'ARGO'
        elif self.dataset_id == 'ref':
            ds.attrs['DATA_ID'] = 'ARGO_Reference'
        if self.dataset_id == 'bgc':
            ds.attrs['DATA_ID'] = 'ARGO-BGC'
        ds.attrs['DOI'] = 'http://doi.org/10.17882/42182'
        ds.attrs['Downloaded_from'] = self.erddap.server
        ds.attrs['Downloaded_by'] = getpass.getuser()
        ds.attrs['Download_date'] = pd.to_datetime('now').strftime('%Y/%m/%d')
        ds.attrs['Download_url'] = self.url
        ds.attrs['Download_constraints'] = self.cname()
        ds = ds[np.sort(ds.data_vars)]

        # Possibly save in cache for later re-use
        if self.cache:
            ds.attrs['cache'] = self.cachepath
            ds.to_netcdf(self.cachepath)

        #
        return ds
Ejemplo n.º 3
0
    def to_dataframe(self):
        """ Load Argo index and return a pandas dataframe """

        # Try to load cached file if requested:
        if self.cache and os.path.exists(self.cachepath):
            ds = pd.read_csv(self.cachepath)
            return ds
        # No cache found or requested, so we compute:

        # Download data: get a csv, open it as pandas dataframe, create wmo field
        df = pd.read_csv(urlopen(self.url), parse_dates=True, skiprows=[1])
        # erddap date format : 2019-03-21T00:00:35Z
        df['date'] = pd.to_datetime(df['date'], format="%Y-%m-%dT%H:%M:%SZ")
        df['date_update'] = pd.to_datetime(df['date_update'],
                                           format="%Y-%m-%dT%H:%M:%SZ")
        df['wmo'] = df.file.apply(lambda x: int(x.split('/')[1]))
        #
        # institution & profiler mapping
        try:
            institution_dictionnary = load_dict('institutions')
            df['tmp1'] = df.institution.apply(
                lambda x: mapp_dict(institution_dictionnary, x))
            profiler_dictionnary = load_dict('profilers')
            df['tmp2'] = df.profiler_type.apply(
                lambda x: mapp_dict(profiler_dictionnary, x))

            df = df.drop(columns=['institution', 'profiler_type'])
            df = df.rename(columns={
                "tmp1": "institution",
                "tmp2": "profiler_type"
            })
        except:
            pass
        # Possibly save in cache for later re-use
        if self.cache:
            df.to_csv(self.cachepath, index=False)

        return df