Example #1
0
    def process(self):
        self.logger.setLevel(self._log_levels[self.args.verbose])
        ad = ArgoData(verbosity=self.args.verbose, cache_file=self.args.cache_file)

        if self.args.wmo:
            wmo_list = self.args.wmo
        else:
            wmo_list = ad.get_cache_file_oxy_count_df()["wmo"].tolist()

        self.logger.info("Reading float profile data from %s", self.args.cache_file)
        for i, wmo in enumerate(wmo_list):
            self.logger.info("WMO_%s: Float %s of %s", wmo, i + 1, len(wmo_list))
            try:
                with pd.HDFStore(self.args.results_file) as s:
                    self.logger.debug("Reading %s from %s", ("/WOA_WMO_{}").format(wmo), self.args.cache_file)
                    wmo_gdf = s.get(("/WOA_WMO_{}").format(wmo))
                    self.logger.debug("Done.")
            except KeyError:
                df = ad.get_float_dataframe(
                    [wmo], max_profiles=self.args.profiles, max_pressure=self.args.pressure, update_cache=False
                )
                wmo_gdf = self.woa_lookup(df)

                # Save intermediate results to HDF file so that the script can
                # pick up where it left off following network or other problems
                with pd.HDFStore(self.args.results_file) as s:
                    s.put(("/WOA_WMO_{}").format(wmo), wmo_gdf)

            if not wmo_gdf.dropna().empty:
                self.logger.debug("wmo_gdf head: %s", wmo_gdf.head())
                self.logger.info("Gain for %s = %s", wmo, wmo_gdf.groupby("wmo").gain.mean().values[0])
Example #2
0
    def process(self):
        if self.args.cache_file:
            cache_file = self.args.cache_file
        else:
            cache_file = self.short_cache_file()

        if self.args.cache_dir:
            cache_dir = self.args.cache_dir
        else:
            cache_dir = expanduser('~')
    
        cache_file = abspath(join(cache_dir, cache_file))

        print(('Loading cache file {}').format(cache_file))
        ad = ArgoData(verbosity=self.args.verbose, cache_file=cache_file,
                      bio_list=self.args.bio_list, variables=self.args.variables)

        if self.args.age:
            wmo_list = ad.get_oxy_floats_from_status(age_gte=self.args.age)
        elif self.args.wmo:
            wmo_list = self.args.wmo

        ad.get_float_dataframe(wmo_list, max_profiles=self.args.profiles, 
                                         max_pressure=self.args.pressure,
                                         append_df=False)

        # After loading add lookup information to the cache file
        df = ad.get_cache_file_oxy_count_df(max_profiles=self.args.profiles, flush=True)
        print(('{} floats appear to have valid oxygen data').format(len(df)))
        print(('Finished loading cache file {}').format(cache_file))
Example #3
0
    def process(self):
        self.logger.setLevel(self._log_levels[self.args.verbose])
        ad = ArgoData(verbosity=self.args.verbose,
                      cache_file=self.args.cache_file)

        if self.args.wmo:
            wmo_list = self.args.wmo
        else:
            wmo_list = ad.get_cache_file_oxy_count_df()['wmo'].tolist()

        self.logger.info('Reading float profile data from %s',
                         self.args.cache_file)
        for i, wmo in enumerate(wmo_list):
            self.logger.info('WMO_%s: Float %s of %s', wmo, i + 1,
                             len(wmo_list))
            try:
                with pd.HDFStore(self.args.results_file) as s:
                    self.logger.debug('Reading %s from %s',
                                      ('/WOA_WMO_{}').format(wmo),
                                      self.args.cache_file)
                    wmo_gdf = s.get(('/WOA_WMO_{}').format(wmo))
                    self.logger.debug('Done.')
            except KeyError:
                df = ad.get_float_dataframe([wmo],
                                            max_profiles=self.args.profiles,
                                            max_pressure=self.args.pressure,
                                            update_cache=False)
                wmo_gdf = self.woa_lookup(df)

                if not wmo_gdf.dropna().empty:
                    # Save intermediate results to HDF file so that the script can
                    # pick up where it left off following network or other problems
                    with pd.HDFStore(self.args.results_file) as s:
                        s.put(('/WOA_WMO_{}').format(wmo), wmo_gdf)
                else:
                    self.logger.warn('Empty DataFrame for wmo %s', wmo)

            if not wmo_gdf.dropna().empty:
                self.logger.debug('wmo_gdf head: %s', wmo_gdf.head())
                self.logger.info('Gain for %s = %s', wmo,
                                 wmo_gdf.groupby('wmo').gain.mean().values[0])
Example #4
0
    def process(self):
        if self.args.cache_file:
            cache_file = self.args.cache_file
        else:
            cache_file = self.short_cache_file()

        if self.args.cache_dir:
            cache_dir = self.args.cache_dir
        else:
            cache_dir = expanduser('~')

        cache_file = abspath(join(cache_dir, cache_file))

        print(('Loading cache file {}').format(cache_file))
        ad = ArgoData(verbosity=self.args.verbose,
                      cache_file=cache_file,
                      bio_list=self.args.bio_list,
                      variables=self.args.variables)

        if self.args.age:
            wmo_list = ad.get_oxy_floats_from_status(age_gte=self.args.age)
        elif self.args.wmo:
            wmo_list = self.args.wmo

        ad.get_float_dataframe(wmo_list,
                               max_profiles=self.args.profiles,
                               max_pressure=self.args.pressure,
                               append_df=False)

        # After loading add lookup information to the cache file
        df = ad.get_cache_file_oxy_count_df(max_profiles=self.args.profiles,
                                            flush=True)
        print(('{} floats appear to have valid oxygen data').format(len(df)))
        print(('Finished loading cache file {}').format(cache_file))
Example #5
0
    def test_fixed_cache_file(self):
        age = 3000  # Returns 1 float on 2 November 2015
        parent_dir = os.path.join(os.path.dirname(__file__), "../")

        # Simulated what's done by load_biofloat_cache.py
        from scripts.load_biofloat_cache import ArgoDataLoader
        from argparse import Namespace
        adl = ArgoDataLoader()
        adl.args = Namespace(age=3000, profiles=1)
        cache_file = os.path.abspath(
            os.path.join(parent_dir, 'biofloat', adl.short_cache_file()))

        ad = ArgoData(verbosity=1, cache_file=cache_file)
        wmo_list = ad.get_oxy_floats_from_status(age_gte=age)
        # Force limiting to what's in cache_file name: 1
        ad.get_float_dataframe(wmo_list, max_profiles=2)
        # Force using maximum value
        ad.get_float_dataframe(wmo_list)
Example #6
0
    def test_fixed_cache_file(self):
        age = 3000      # Returns 1 float on 2 November 2015
        parent_dir = os.path.join(os.path.dirname(__file__), "../")

        # Simulated what's done by load_biofloat_cache.py
        from scripts.load_biofloat_cache import ArgoDataLoader
        from argparse import Namespace
        adl = ArgoDataLoader()
        adl.args = Namespace(age=3000, profiles=1)
        cache_file = os.path.abspath(
                     os.path.join(parent_dir, 'biofloat', adl.short_cache_file()))

        ad = ArgoData(verbosity=1, cache_file=cache_file)
        wmo_list = ad.get_oxy_floats_from_status(age_gte=age)
        # Force limiting to what's in cache_file name: 1
        ad.get_float_dataframe(wmo_list, max_profiles=2)
        # Force using maximum value
        ad.get_float_dataframe(wmo_list)
Example #7
0
 def test_cache_file(self):
     ad = ArgoData(cache_file='/tmp/biofloat_cache_file.hdf')
     ad.set_verbosity(1)
Example #8
0
 def setUp(self):
     self.ad = ArgoData(verbosity=1)
     self.good_oga_floats = ['1900650']
     self.bad_oga_floats = ['6901464']
     self._build_default_cache()
Example #9
0
class DataTest(unittest.TestCase):
    def setUp(self):
        self.ad = ArgoData(verbosity=1)
        self.good_oga_floats = ['1900650']
        self.bad_oga_floats = ['6901464']
        self._build_default_cache()

    def test_get_biofloats(self):
        self.oga_floats = self.ad.get_oxy_floats_from_status()
        self.assertNotEqual(len(self.oga_floats), 0)

    def _get_dac_urls(self):
        # Testing with a float that has data
        for dac_url in self.ad.get_dac_urls(self.good_oga_floats).values():
            self.dac_url = dac_url
            self.assertTrue(self.dac_url.startswith('http'))
            break

    def _get_profile_opendap_urls(self):
        for profile_url in self.ad.get_profile_opendap_urls(self.dac_url):
            self.profile_url = profile_url
            break

    def _profile_to_dataframe(self):
        key, code = self.ad._float_profile_key(self.profile_url)
        d = self.ad._profile_to_dataframe(self.good_oga_floats[0],
                                          self.profile_url, key, 11000)
        self.assertNotEqual(len(d), 0)

    def _build_default_cache(self):
        # Methods need to be called in order
        self._get_dac_urls()
        self._get_profile_opendap_urls()
        self._profile_to_dataframe()

    def test_get_float_dataframe(self):
        df = self.ad.get_float_dataframe(self.good_oga_floats, max_profiles=2)
        self.assertNotEqual(len(df), 0)
        df = self.ad.get_float_dataframe(self.bad_oga_floats, max_profiles=2)
        self.assertEqual(len(df.dropna()), 0)

    def test_cache_file(self):
        ad = ArgoData(cache_file='/tmp/biofloat_cache_file.hdf')
        ad.set_verbosity(1)

    def test_fixed_cache_file(self):
        age = 3000  # Returns 1 float on 2 November 2015
        parent_dir = os.path.join(os.path.dirname(__file__), "../")

        # Simulated what's done by load_biofloat_cache.py
        from scripts.load_biofloat_cache import ArgoDataLoader
        from argparse import Namespace
        adl = ArgoDataLoader()
        adl.args = Namespace(age=3000, profiles=1)
        cache_file = os.path.abspath(
            os.path.join(parent_dir, 'biofloat', adl.short_cache_file()))

        ad = ArgoData(verbosity=1, cache_file=cache_file)
        wmo_list = ad.get_oxy_floats_from_status(age_gte=age)
        # Force limiting to what's in cache_file name: 1
        ad.get_float_dataframe(wmo_list, max_profiles=2)
        # Force using maximum value
        ad.get_float_dataframe(wmo_list)

    def test_util_o2sat(self):
        # See http://www.engineeringtoolbox.com/oxygen-solubility-water-d_841.html
        self.assertAlmostEqual(utils.o2sat(35, 5), 308, places=0)
        self.assertAlmostEqual(utils.o2sat(35, 20), 225, places=0)
        self.assertAlmostEqual(utils.o2sat(35, 30), 190, places=0)

    def test_util_convert_to_mll(self):
        # See http://www.engineeringtoolbox.com/oxygen-solubility-water-d_841.html
        # and http://www.oceanographers.net/forums/showthread.php?1486-ask-how-to-conversion-ml-L-to-%B5mol-kg
        self.assertAlmostEqual(utils.convert_to_mll(308, 35, 5, 0),
                               7.1,
                               places=1)
        self.assertAlmostEqual(utils.convert_to_mll(225.6, 36.5, 1, 0),
                               5.2,
                               places=1)

    def test_get_bio_profile_index(self):
        df = self.ad.get_bio_profile_index()
        self.assertNotEqual(len(df), 0)

    def test_get_cache_file_all_wmo_list(self):
        df = self.ad.get_float_dataframe(self.good_oga_floats, max_profiles=2)
        self.assertNotEqual(len(df), 0)
        df = self.ad.get_cache_file_all_wmo_list(flush=True)
        self.assertNotEqual(len(df), 0)
        df = self.ad.get_cache_file_all_wmo_list()
        self.assertNotEqual(len(df), 0)

    def test_get_cache_file_oxy_wmo_list(self):
        df = self.ad.get_float_dataframe(self.good_oga_floats, max_profiles=2)
        self.assertNotEqual(len(df), 0)
        df = self.ad.get_cache_file_oxy_count_df(max_profiles=2, flush=True)
        self.assertNotEqual(len(df), 0)
        df = self.ad.get_cache_file_oxy_count_df(max_profiles=2)
        self.assertNotEqual(len(df), 0)

    def test_get_update_datetime(self):
        # Any random delayed mode file will do...
        dt = self.ad._get_update_datetime(
            'http://tds0.ifremer.fr/thredds/dodsC/CORIOLIS-ARGO-GDAC-OBS/aoml/5901336/profiles/D5901336_082.nc'
        )

    def test_to_odv(self):
        df = self.ad.get_float_dataframe(self.good_oga_floats, max_profiles=2)
        converters.to_odv(df, 'biofloat_data.txt')

    def test_update_cache_false(self):
        df = self.ad.get_float_dataframe(self.good_oga_floats,
                                         max_profiles=2,
                                         update_cache=False)
        self.assertNotEqual(len(df), 0)

    def test_remove_df(self):
        self.ad._remove_df(self.ad._GLOBAL_META)
Example #10
0
 def test_cache_file(self):
     ad = ArgoData(cache_file='/tmp/biofloat_cache_file.hdf')
     ad.set_verbosity(1)
Example #11
0
 def setUp(self):
     self.ad = ArgoData(verbosity=1)
     self.good_oga_floats = ['1900650']
     self.bad_oga_floats = ['6901464']
     self._build_default_cache()
Example #12
0
class DataTest(unittest.TestCase):
    def setUp(self):
        self.ad = ArgoData(verbosity=1)
        self.good_oga_floats = ['1900650']
        self.bad_oga_floats = ['6901464']
        self._build_default_cache()

    def test_get_biofloats(self):
        self.oga_floats = self.ad.get_oxy_floats_from_status()
        self.assertNotEqual(len(self.oga_floats), 0)

    def _get_dac_urls(self):
        # Testing with a float that has data
        for dac_url in self.ad.get_dac_urls(self.good_oga_floats).values():
            self.dac_url = dac_url
            self.assertTrue(self.dac_url.startswith('http'))
            break

    def _get_profile_opendap_urls(self):
        for profile_url in self.ad.get_profile_opendap_urls(self.dac_url):
            self.profile_url = profile_url
            break

    def _profile_to_dataframe(self):
        key, code = self.ad._float_profile_key(self.profile_url)
        d = self.ad._profile_to_dataframe(self.good_oga_floats[0], 
                self.profile_url, key, 11000)
        self.assertNotEqual(len(d), 0)

    def _build_default_cache(self):
        # Methods need to be called in order
        self._get_dac_urls()
        self._get_profile_opendap_urls()
        self._profile_to_dataframe()

    def test_get_float_dataframe(self):
        df = self.ad.get_float_dataframe(self.good_oga_floats, max_profiles=2)
        self.assertNotEqual(len(df), 0)
        df = self.ad.get_float_dataframe(self.bad_oga_floats, max_profiles=2)
        self.assertEqual(len(df.dropna()), 0)

    def test_cache_file(self):
        ad = ArgoData(cache_file='/tmp/biofloat_cache_file.hdf')
        ad.set_verbosity(1)

    def test_fixed_cache_file(self):
        age = 3000      # Returns 1 float on 2 November 2015
        parent_dir = os.path.join(os.path.dirname(__file__), "../")

        # Simulated what's done by load_biofloat_cache.py
        from scripts.load_biofloat_cache import ArgoDataLoader
        from argparse import Namespace
        adl = ArgoDataLoader()
        adl.args = Namespace(age=3000, profiles=1)
        cache_file = os.path.abspath(
                     os.path.join(parent_dir, 'biofloat', adl.short_cache_file()))

        ad = ArgoData(verbosity=1, cache_file=cache_file)
        wmo_list = ad.get_oxy_floats_from_status(age_gte=age)
        # Force limiting to what's in cache_file name: 1
        ad.get_float_dataframe(wmo_list, max_profiles=2)
        # Force using maximum value
        ad.get_float_dataframe(wmo_list)

    def test_util_o2sat(self):
        # See http://www.engineeringtoolbox.com/oxygen-solubility-water-d_841.html
        self.assertAlmostEqual(utils.o2sat(35, 5), 308, places=0)
        self.assertAlmostEqual(utils.o2sat(35, 20), 225, places=0)
        self.assertAlmostEqual(utils.o2sat(35, 30), 190, places=0)

    def test_util_convert_to_mll(self):
        # See http://www.engineeringtoolbox.com/oxygen-solubility-water-d_841.html
        # and http://www.oceanographers.net/forums/showthread.php?1486-ask-how-to-conversion-ml-L-to-%B5mol-kg
        self.assertAlmostEqual(utils.convert_to_mll(308, 35, 5, 0), 7.1, places=1)
        self.assertAlmostEqual(utils.convert_to_mll(225.6, 36.5, 1, 0), 5.2, places=1)

    def test_get_bio_profile_index(self):
        df = self.ad.get_bio_profile_index()
        self.assertNotEqual(len(df), 0)

    def test_get_cache_file_all_wmo_list(self):
        df = self.ad.get_float_dataframe(self.good_oga_floats, max_profiles=2)
        self.assertNotEqual(len(df), 0)
        df = self.ad.get_cache_file_all_wmo_list(flush=True)
        self.assertNotEqual(len(df), 0)
        df = self.ad.get_cache_file_all_wmo_list()
        self.assertNotEqual(len(df), 0)

    def test_get_cache_file_oxy_wmo_list(self):
        df = self.ad.get_float_dataframe(self.good_oga_floats, max_profiles=2)
        self.assertNotEqual(len(df), 0)
        df = self.ad.get_cache_file_oxy_count_df(max_profiles=2, flush=True)
        self.assertNotEqual(len(df), 0)
        df = self.ad.get_cache_file_oxy_count_df(max_profiles=2)
        self.assertNotEqual(len(df), 0)

    def test_get_update_datetime(self):
        # Any random delayed mode file will do...
        dt = self.ad._get_update_datetime(
                'http://tds0.ifremer.fr/thredds/dodsC/CORIOLIS-ARGO-GDAC-OBS/aoml/5901336/profiles/D5901336_082.nc')

    def test_to_odv(self):
        df = self.ad.get_float_dataframe(self.good_oga_floats, max_profiles=2)
        converters.to_odv(df, 'biofloat_data.txt')

    def test_update_cache_false(self):
        df = self.ad.get_float_dataframe(self.good_oga_floats, max_profiles=2,
                                         update_cache=False)
        self.assertNotEqual(len(df), 0)

    def test_remove_df(self):
        self.ad._remove_df(self.ad._GLOBAL_META)