Esempio n. 1
0
    def to_dataframe(self):
        """ Load Argo index and return a pandas dataframe """

        # Download data: get a csv, open it as pandas dataframe, create wmo field
        df = self.fs.read_csv(self.url, parse_dates=True, skiprows=[1])

        # erddap date format : 2019-03-21T00:00:35Z
        df['date'] = pd.to_datetime(df['date'], format="%Y-%m-%dT%H:%M:%SZ")
        df['date_update'] = pd.to_datetime(df['date_update'],
                                           format="%Y-%m-%dT%H:%M:%SZ")
        df['wmo'] = df.file.apply(lambda x: int(x.split('/')[1]))

        # institution & profiler mapping
        institution_dictionnary = load_dict('institutions')
        df['tmp1'] = df.institution.apply(
            lambda x: mapp_dict(institution_dictionnary, x))
        df = df.rename(columns={
            "institution": "institution_code",
            "tmp1": "institution"
        })

        profiler_dictionnary = load_dict('profilers')
        df['profiler'] = df.profiler_type.apply(
            lambda x: mapp_dict(profiler_dictionnary, int(x)))
        df = df.rename(columns={"profiler_type": "profiler_code"})

        return df
Esempio n. 2
0
    def to_dataframe(self):
        """ filter local index file and return a pandas dataframe """
        df = self.fs.open_dataframe(self.filter_index())

        # Post-processing of the filtered index:
        df['wmo'] = df['file'].apply(lambda x: int(x.split('/')[1]))

        # institution & profiler mapping for all users
        # todo: may be we need to separate this for standard and expert users
        institution_dictionnary = load_dict('institutions')
        df['tmp1'] = df.institution.apply(lambda x: mapp_dict(institution_dictionnary, x))
        df = df.rename(columns={"institution": "institution_code", "tmp1": "institution"})

        profiler_dictionnary = load_dict('profilers')
        df['profiler'] = df.profiler_type.apply(lambda x: mapp_dict(profiler_dictionnary, int(x)))
        df = df.rename(columns={"profiler_type": "profiler_code"})

        return df
Esempio n. 3
0
    def to_dataframe(self):
        """ filter local index file and return a pandas dataframe """
        #
        # Try to load cached file if requested:
        if self.cache and os.path.exists(self.cachepath):
            df = pd.read_csv(self.cachepath)
            return df
        # No cache found or requested, so we compute:
        self.filter_index()
        #
        df = pd.read_csv(self.filtered_index)
        #create datetime & wmo field
        #local ftp date format 20160513065300
        df['date'] = pd.to_datetime(df['date'], format="%Y%m%d%H%M%S")
        df['date_update'] = pd.to_datetime(df['date_update'],
                                           format="%Y%m%d%H%M%S")

        df['wmo'] = df.file.apply(lambda x: int(x.split('/')[1]))
        #
        # institution & profiler mapping
        try:
            institution_dictionnary = load_dict('institutions')
            df['tmp1'] = df.institution.apply(
                lambda x: mapp_dict(institution_dictionnary, x))
            profiler_dictionnary = load_dict('profilers')
            df['tmp2'] = df.profiler_type.apply(
                lambda x: mapp_dict(profiler_dictionnary, x))

            df = df.drop(columns=['institution', 'profiler_type'])
            df = df.rename(columns={
                "tmp1": "institution",
                "tmp2": "profiler_type"
            })
        except:
            pass

        # Possibly save in cache for later re-use
        if self.cache:
            df.to_csv(self.cachepath, index=False)

        return df
Esempio n. 4
0
    def to_dataframe(self):
        """ Load Argo index and return a pandas dataframe """

        # Try to load cached file if requested:
        if self.cache and os.path.exists(self.cachepath):
            ds = pd.read_csv(self.cachepath)
            return ds
        # No cache found or requested, so we compute:

        # Download data: get a csv, open it as pandas dataframe, create wmo field
        df = pd.read_csv(urlopen(self.url), parse_dates=True, skiprows=[1])
        # erddap date format : 2019-03-21T00:00:35Z
        df['date'] = pd.to_datetime(df['date'], format="%Y-%m-%dT%H:%M:%SZ")
        df['date_update'] = pd.to_datetime(df['date_update'],
                                           format="%Y-%m-%dT%H:%M:%SZ")
        df['wmo'] = df.file.apply(lambda x: int(x.split('/')[1]))
        #
        # institution & profiler mapping
        try:
            institution_dictionnary = load_dict('institutions')
            df['tmp1'] = df.institution.apply(
                lambda x: mapp_dict(institution_dictionnary, x))
            profiler_dictionnary = load_dict('profilers')
            df['tmp2'] = df.profiler_type.apply(
                lambda x: mapp_dict(profiler_dictionnary, x))

            df = df.drop(columns=['institution', 'profiler_type'])
            df = df.rename(columns={
                "tmp1": "institution",
                "tmp2": "profiler_type"
            })
        except:
            pass
        # Possibly save in cache for later re-use
        if self.cache:
            df.to_csv(self.cachepath, index=False)

        return df
Esempio n. 5
0
def test_invalid_dictionnary_key():
    d = load_dict('profilers')
    assert mapp_dict(d, "invalid_key") == "Unknown"
Esempio n. 6
0
def test_invalid_dictionnary():
    with pytest.raises(ValueError):
        load_dict("invalid_dictionnary")