def process(self): if self.args.cache_file: cache_file = self.args.cache_file else: cache_file = self.short_cache_file() if self.args.cache_dir: cache_dir = self.args.cache_dir else: cache_dir = expanduser('~') cache_file = abspath(join(cache_dir, cache_file)) print(('Loading cache file {}').format(cache_file)) ad = ArgoData(verbosity=self.args.verbose, cache_file=cache_file, bio_list=self.args.bio_list, variables=self.args.variables) if self.args.age: wmo_list = ad.get_oxy_floats_from_status(age_gte=self.args.age) elif self.args.wmo: wmo_list = self.args.wmo ad.get_float_dataframe(wmo_list, max_profiles=self.args.profiles, max_pressure=self.args.pressure, append_df=False) # After loading add lookup information to the cache file df = ad.get_cache_file_oxy_count_df(max_profiles=self.args.profiles, flush=True) print(('{} floats appear to have valid oxygen data').format(len(df))) print(('Finished loading cache file {}').format(cache_file))
def test_fixed_cache_file(self): age = 3000 # Returns 1 float on 2 November 2015 parent_dir = os.path.join(os.path.dirname(__file__), "../") # Simulated what's done by load_biofloat_cache.py from scripts.load_biofloat_cache import ArgoDataLoader from argparse import Namespace adl = ArgoDataLoader() adl.args = Namespace(age=3000, profiles=1) cache_file = os.path.abspath( os.path.join(parent_dir, 'biofloat', adl.short_cache_file())) ad = ArgoData(verbosity=1, cache_file=cache_file) wmo_list = ad.get_oxy_floats_from_status(age_gte=age) # Force limiting to what's in cache_file name: 1 ad.get_float_dataframe(wmo_list, max_profiles=2) # Force using maximum value ad.get_float_dataframe(wmo_list)
def process(self): self.logger.setLevel(self._log_levels[self.args.verbose]) ad = ArgoData(verbosity=self.args.verbose, cache_file=self.args.cache_file) if self.args.wmo: wmo_list = self.args.wmo else: wmo_list = ad.get_cache_file_oxy_count_df()['wmo'].tolist() self.logger.info('Reading float profile data from %s', self.args.cache_file) for i, wmo in enumerate(wmo_list): self.logger.info('WMO_%s: Float %s of %s', wmo, i + 1, len(wmo_list)) try: with pd.HDFStore(self.args.results_file) as s: self.logger.debug('Reading %s from %s', ('/WOA_WMO_{}').format(wmo), self.args.cache_file) wmo_gdf = s.get(('/WOA_WMO_{}').format(wmo)) self.logger.debug('Done.') except KeyError: df = ad.get_float_dataframe([wmo], max_profiles=self.args.profiles, max_pressure=self.args.pressure, update_cache=False) wmo_gdf = self.woa_lookup(df) if not wmo_gdf.dropna().empty: # Save intermediate results to HDF file so that the script can # pick up where it left off following network or other problems with pd.HDFStore(self.args.results_file) as s: s.put(('/WOA_WMO_{}').format(wmo), wmo_gdf) else: self.logger.warn('Empty DataFrame for wmo %s', wmo) if not wmo_gdf.dropna().empty: self.logger.debug('wmo_gdf head: %s', wmo_gdf.head()) self.logger.info('Gain for %s = %s', wmo, wmo_gdf.groupby('wmo').gain.mean().values[0])
def test_cache_file(self): ad = ArgoData(cache_file='/tmp/biofloat_cache_file.hdf') ad.set_verbosity(1)
def setUp(self): self.ad = ArgoData(verbosity=1) self.good_oga_floats = ['1900650'] self.bad_oga_floats = ['6901464'] self._build_default_cache()