def process(self): self.logger.setLevel(self._log_levels[self.args.verbose]) ad = ArgoData(verbosity=self.args.verbose, cache_file=self.args.cache_file) if self.args.wmo: wmo_list = self.args.wmo else: wmo_list = ad.get_cache_file_oxy_count_df()["wmo"].tolist() self.logger.info("Reading float profile data from %s", self.args.cache_file) for i, wmo in enumerate(wmo_list): self.logger.info("WMO_%s: Float %s of %s", wmo, i + 1, len(wmo_list)) try: with pd.HDFStore(self.args.results_file) as s: self.logger.debug("Reading %s from %s", ("/WOA_WMO_{}").format(wmo), self.args.cache_file) wmo_gdf = s.get(("/WOA_WMO_{}").format(wmo)) self.logger.debug("Done.") except KeyError: df = ad.get_float_dataframe( [wmo], max_profiles=self.args.profiles, max_pressure=self.args.pressure, update_cache=False ) wmo_gdf = self.woa_lookup(df) # Save intermediate results to HDF file so that the script can # pick up where it left off following network or other problems with pd.HDFStore(self.args.results_file) as s: s.put(("/WOA_WMO_{}").format(wmo), wmo_gdf) if not wmo_gdf.dropna().empty: self.logger.debug("wmo_gdf head: %s", wmo_gdf.head()) self.logger.info("Gain for %s = %s", wmo, wmo_gdf.groupby("wmo").gain.mean().values[0])
def process(self): if self.args.cache_file: cache_file = self.args.cache_file else: cache_file = self.short_cache_file() if self.args.cache_dir: cache_dir = self.args.cache_dir else: cache_dir = expanduser('~') cache_file = abspath(join(cache_dir, cache_file)) print(('Loading cache file {}').format(cache_file)) ad = ArgoData(verbosity=self.args.verbose, cache_file=cache_file, bio_list=self.args.bio_list, variables=self.args.variables) if self.args.age: wmo_list = ad.get_oxy_floats_from_status(age_gte=self.args.age) elif self.args.wmo: wmo_list = self.args.wmo ad.get_float_dataframe(wmo_list, max_profiles=self.args.profiles, max_pressure=self.args.pressure, append_df=False) # After loading add lookup information to the cache file df = ad.get_cache_file_oxy_count_df(max_profiles=self.args.profiles, flush=True) print(('{} floats appear to have valid oxygen data').format(len(df))) print(('Finished loading cache file {}').format(cache_file))
def process(self): self.logger.setLevel(self._log_levels[self.args.verbose]) ad = ArgoData(verbosity=self.args.verbose, cache_file=self.args.cache_file) if self.args.wmo: wmo_list = self.args.wmo else: wmo_list = ad.get_cache_file_oxy_count_df()['wmo'].tolist() self.logger.info('Reading float profile data from %s', self.args.cache_file) for i, wmo in enumerate(wmo_list): self.logger.info('WMO_%s: Float %s of %s', wmo, i + 1, len(wmo_list)) try: with pd.HDFStore(self.args.results_file) as s: self.logger.debug('Reading %s from %s', ('/WOA_WMO_{}').format(wmo), self.args.cache_file) wmo_gdf = s.get(('/WOA_WMO_{}').format(wmo)) self.logger.debug('Done.') except KeyError: df = ad.get_float_dataframe([wmo], max_profiles=self.args.profiles, max_pressure=self.args.pressure, update_cache=False) wmo_gdf = self.woa_lookup(df) if not wmo_gdf.dropna().empty: # Save intermediate results to HDF file so that the script can # pick up where it left off following network or other problems with pd.HDFStore(self.args.results_file) as s: s.put(('/WOA_WMO_{}').format(wmo), wmo_gdf) else: self.logger.warn('Empty DataFrame for wmo %s', wmo) if not wmo_gdf.dropna().empty: self.logger.debug('wmo_gdf head: %s', wmo_gdf.head()) self.logger.info('Gain for %s = %s', wmo, wmo_gdf.groupby('wmo').gain.mean().values[0])
def test_fixed_cache_file(self): age = 3000 # Returns 1 float on 2 November 2015 parent_dir = os.path.join(os.path.dirname(__file__), "../") # Simulated what's done by load_biofloat_cache.py from scripts.load_biofloat_cache import ArgoDataLoader from argparse import Namespace adl = ArgoDataLoader() adl.args = Namespace(age=3000, profiles=1) cache_file = os.path.abspath( os.path.join(parent_dir, 'biofloat', adl.short_cache_file())) ad = ArgoData(verbosity=1, cache_file=cache_file) wmo_list = ad.get_oxy_floats_from_status(age_gte=age) # Force limiting to what's in cache_file name: 1 ad.get_float_dataframe(wmo_list, max_profiles=2) # Force using maximum value ad.get_float_dataframe(wmo_list)
def test_cache_file(self): ad = ArgoData(cache_file='/tmp/biofloat_cache_file.hdf') ad.set_verbosity(1)
def setUp(self): self.ad = ArgoData(verbosity=1) self.good_oga_floats = ['1900650'] self.bad_oga_floats = ['6901464'] self._build_default_cache()
class DataTest(unittest.TestCase): def setUp(self): self.ad = ArgoData(verbosity=1) self.good_oga_floats = ['1900650'] self.bad_oga_floats = ['6901464'] self._build_default_cache() def test_get_biofloats(self): self.oga_floats = self.ad.get_oxy_floats_from_status() self.assertNotEqual(len(self.oga_floats), 0) def _get_dac_urls(self): # Testing with a float that has data for dac_url in self.ad.get_dac_urls(self.good_oga_floats).values(): self.dac_url = dac_url self.assertTrue(self.dac_url.startswith('http')) break def _get_profile_opendap_urls(self): for profile_url in self.ad.get_profile_opendap_urls(self.dac_url): self.profile_url = profile_url break def _profile_to_dataframe(self): key, code = self.ad._float_profile_key(self.profile_url) d = self.ad._profile_to_dataframe(self.good_oga_floats[0], self.profile_url, key, 11000) self.assertNotEqual(len(d), 0) def _build_default_cache(self): # Methods need to be called in order self._get_dac_urls() self._get_profile_opendap_urls() self._profile_to_dataframe() def test_get_float_dataframe(self): df = self.ad.get_float_dataframe(self.good_oga_floats, max_profiles=2) self.assertNotEqual(len(df), 0) df = self.ad.get_float_dataframe(self.bad_oga_floats, max_profiles=2) self.assertEqual(len(df.dropna()), 0) def test_cache_file(self): ad = ArgoData(cache_file='/tmp/biofloat_cache_file.hdf') ad.set_verbosity(1) def test_fixed_cache_file(self): age = 3000 # Returns 1 float on 2 November 2015 parent_dir = os.path.join(os.path.dirname(__file__), "../") # Simulated what's done by load_biofloat_cache.py from scripts.load_biofloat_cache import ArgoDataLoader from argparse import Namespace adl = ArgoDataLoader() adl.args = Namespace(age=3000, profiles=1) cache_file = os.path.abspath( os.path.join(parent_dir, 'biofloat', adl.short_cache_file())) ad = ArgoData(verbosity=1, cache_file=cache_file) wmo_list = ad.get_oxy_floats_from_status(age_gte=age) # Force limiting to what's in cache_file name: 1 ad.get_float_dataframe(wmo_list, max_profiles=2) # Force using maximum value ad.get_float_dataframe(wmo_list) def test_util_o2sat(self): # See http://www.engineeringtoolbox.com/oxygen-solubility-water-d_841.html self.assertAlmostEqual(utils.o2sat(35, 5), 308, places=0) self.assertAlmostEqual(utils.o2sat(35, 20), 225, places=0) self.assertAlmostEqual(utils.o2sat(35, 30), 190, places=0) def test_util_convert_to_mll(self): # See http://www.engineeringtoolbox.com/oxygen-solubility-water-d_841.html # and http://www.oceanographers.net/forums/showthread.php?1486-ask-how-to-conversion-ml-L-to-%B5mol-kg self.assertAlmostEqual(utils.convert_to_mll(308, 35, 5, 0), 7.1, places=1) self.assertAlmostEqual(utils.convert_to_mll(225.6, 36.5, 1, 0), 5.2, places=1) def test_get_bio_profile_index(self): df = self.ad.get_bio_profile_index() self.assertNotEqual(len(df), 0) def test_get_cache_file_all_wmo_list(self): df = self.ad.get_float_dataframe(self.good_oga_floats, max_profiles=2) self.assertNotEqual(len(df), 0) df = self.ad.get_cache_file_all_wmo_list(flush=True) self.assertNotEqual(len(df), 0) df = self.ad.get_cache_file_all_wmo_list() self.assertNotEqual(len(df), 0) def test_get_cache_file_oxy_wmo_list(self): df = self.ad.get_float_dataframe(self.good_oga_floats, max_profiles=2) self.assertNotEqual(len(df), 0) df = self.ad.get_cache_file_oxy_count_df(max_profiles=2, flush=True) self.assertNotEqual(len(df), 0) df = self.ad.get_cache_file_oxy_count_df(max_profiles=2) self.assertNotEqual(len(df), 0) def test_get_update_datetime(self): # Any random delayed mode file will do... dt = self.ad._get_update_datetime( 'http://tds0.ifremer.fr/thredds/dodsC/CORIOLIS-ARGO-GDAC-OBS/aoml/5901336/profiles/D5901336_082.nc' ) def test_to_odv(self): df = self.ad.get_float_dataframe(self.good_oga_floats, max_profiles=2) converters.to_odv(df, 'biofloat_data.txt') def test_update_cache_false(self): df = self.ad.get_float_dataframe(self.good_oga_floats, max_profiles=2, update_cache=False) self.assertNotEqual(len(df), 0) def test_remove_df(self): self.ad._remove_df(self.ad._GLOBAL_META)
class DataTest(unittest.TestCase): def setUp(self): self.ad = ArgoData(verbosity=1) self.good_oga_floats = ['1900650'] self.bad_oga_floats = ['6901464'] self._build_default_cache() def test_get_biofloats(self): self.oga_floats = self.ad.get_oxy_floats_from_status() self.assertNotEqual(len(self.oga_floats), 0) def _get_dac_urls(self): # Testing with a float that has data for dac_url in self.ad.get_dac_urls(self.good_oga_floats).values(): self.dac_url = dac_url self.assertTrue(self.dac_url.startswith('http')) break def _get_profile_opendap_urls(self): for profile_url in self.ad.get_profile_opendap_urls(self.dac_url): self.profile_url = profile_url break def _profile_to_dataframe(self): key, code = self.ad._float_profile_key(self.profile_url) d = self.ad._profile_to_dataframe(self.good_oga_floats[0], self.profile_url, key, 11000) self.assertNotEqual(len(d), 0) def _build_default_cache(self): # Methods need to be called in order self._get_dac_urls() self._get_profile_opendap_urls() self._profile_to_dataframe() def test_get_float_dataframe(self): df = self.ad.get_float_dataframe(self.good_oga_floats, max_profiles=2) self.assertNotEqual(len(df), 0) df = self.ad.get_float_dataframe(self.bad_oga_floats, max_profiles=2) self.assertEqual(len(df.dropna()), 0) def test_cache_file(self): ad = ArgoData(cache_file='/tmp/biofloat_cache_file.hdf') ad.set_verbosity(1) def test_fixed_cache_file(self): age = 3000 # Returns 1 float on 2 November 2015 parent_dir = os.path.join(os.path.dirname(__file__), "../") # Simulated what's done by load_biofloat_cache.py from scripts.load_biofloat_cache import ArgoDataLoader from argparse import Namespace adl = ArgoDataLoader() adl.args = Namespace(age=3000, profiles=1) cache_file = os.path.abspath( os.path.join(parent_dir, 'biofloat', adl.short_cache_file())) ad = ArgoData(verbosity=1, cache_file=cache_file) wmo_list = ad.get_oxy_floats_from_status(age_gte=age) # Force limiting to what's in cache_file name: 1 ad.get_float_dataframe(wmo_list, max_profiles=2) # Force using maximum value ad.get_float_dataframe(wmo_list) def test_util_o2sat(self): # See http://www.engineeringtoolbox.com/oxygen-solubility-water-d_841.html self.assertAlmostEqual(utils.o2sat(35, 5), 308, places=0) self.assertAlmostEqual(utils.o2sat(35, 20), 225, places=0) self.assertAlmostEqual(utils.o2sat(35, 30), 190, places=0) def test_util_convert_to_mll(self): # See http://www.engineeringtoolbox.com/oxygen-solubility-water-d_841.html # and http://www.oceanographers.net/forums/showthread.php?1486-ask-how-to-conversion-ml-L-to-%B5mol-kg self.assertAlmostEqual(utils.convert_to_mll(308, 35, 5, 0), 7.1, places=1) self.assertAlmostEqual(utils.convert_to_mll(225.6, 36.5, 1, 0), 5.2, places=1) def test_get_bio_profile_index(self): df = self.ad.get_bio_profile_index() self.assertNotEqual(len(df), 0) def test_get_cache_file_all_wmo_list(self): df = self.ad.get_float_dataframe(self.good_oga_floats, max_profiles=2) self.assertNotEqual(len(df), 0) df = self.ad.get_cache_file_all_wmo_list(flush=True) self.assertNotEqual(len(df), 0) df = self.ad.get_cache_file_all_wmo_list() self.assertNotEqual(len(df), 0) def test_get_cache_file_oxy_wmo_list(self): df = self.ad.get_float_dataframe(self.good_oga_floats, max_profiles=2) self.assertNotEqual(len(df), 0) df = self.ad.get_cache_file_oxy_count_df(max_profiles=2, flush=True) self.assertNotEqual(len(df), 0) df = self.ad.get_cache_file_oxy_count_df(max_profiles=2) self.assertNotEqual(len(df), 0) def test_get_update_datetime(self): # Any random delayed mode file will do... dt = self.ad._get_update_datetime( 'http://tds0.ifremer.fr/thredds/dodsC/CORIOLIS-ARGO-GDAC-OBS/aoml/5901336/profiles/D5901336_082.nc') def test_to_odv(self): df = self.ad.get_float_dataframe(self.good_oga_floats, max_profiles=2) converters.to_odv(df, 'biofloat_data.txt') def test_update_cache_false(self): df = self.ad.get_float_dataframe(self.good_oga_floats, max_profiles=2, update_cache=False) self.assertNotEqual(len(df), 0) def test_remove_df(self): self.ad._remove_df(self.ad._GLOBAL_META)