def test_save_dataset(self): # Test normal functionality dataset = open_dataset(ds_id='AEROSOL_AATSR_SU_L3_V4.21_MONTHLY', time_range='2008-01-01, 2008-03-01') save_dataset(dataset, 'remove_me.nc') self.assertTrue(os.path.isfile('remove_me.nc')) os.remove('remove_me.nc') # Test required arguments with self.assertRaises(TypeError): save_dataset(dataset) # Test behavior when passing unexpected type with self.assertRaises(NotImplementedError): dataset = ('a', 1, 3, 5) save_dataset(dataset, 'remove_me.nc') self.assertFalse(os.path.isfile('remove_me.nc'))
def test_save_dataset(self): # Test normal functionality dataset = open_dataset(ds_id='AEROSOL_AATSR_SU_L3_V4.21_MONTHLY', time_range='2008-01-01, 2008-03-01') save_dataset(dataset, 'remove_me.nc') self.assertTrue(os.path.isfile('remove_me.nc')) os.remove('remove_me.nc') # Test required arguments with self.assertRaises(TypeError): save_dataset(dataset) # Test behavior when passing unexpected type with self.assertRaises(NotImplementedError): dataset = ('a', 1, 3, 5) # noinspection PyTypeChecker save_dataset(dataset, 'remove_me.nc') self.assertFalse(os.path.isfile('remove_me.nc'))
def long_term_average(source: str, year_min: int, year_max: int, file: str, var: VarNamesLike.TYPE = None, save: bool = False, monitor: Monitor = Monitor.NONE) -> xr.Dataset: """ Perform the long term monthly average of the given monthly or daily data source for the given range of years. Depending on the given year range, data size, as well as internet connection quality, this operation can potentially take a very long time to finish. Careful consideration is needed in choosing the var parameter to create meaningful outputs. This is unique for each data source. :param source: The data source from which to extract the monthly average :param year_min: The earliest year of the desired time range :param year_max: The most recent year of the desired time range :param file: filepath where to save the long term average dataset :param var: If given, only these variable names will be preserved in the output. :param save: If True, saves the data downloaded during this operation. This can potentially be a very large amount of data. :param monitor: A progress monitor to use :return: The Long Term Average dataset. """ var = VarNamesLike.convert(var) n_years = year_max - year_min + 1 res = 0 total_work = 100 # Select the appropriate data source data_store_list = DATA_STORE_REGISTRY.get_data_stores() data_sources = query_data_sources(data_store_list, name=source) if len(data_sources) == 0: raise ValueError("No data_source found for the given query\ term {}".format(source)) elif len(data_sources) > 1: raise ValueError("{} data_sources found for the given query\ term {}".format(data_sources, source)) data_source = data_sources[0] source_info = data_source.cache_info # Check if we have a monthly data source fq = data_source.meta_info['time_frequency'] if fq != 'mon': raise ValueError("Only monthly datasets are supported for time being.") with monitor.starting('LTA', total_work=total_work): # Set up the monitor monitor.progress(work=0) step = total_work * 0.9 / n_years # Process the data source year by year year = year_min while year != year_max + 1: tmin = "{}-01-01".format(year) tmax = "{}-12-31".format(year) # Determine if the data for the given year are already downloaded # If at least one file of the given time range is present, we # don't delete the data for this year, we do the syncing anyway. was_already_downloaded = False dt_range = to_datetime_range(tmin, tmax) for date in source_info: if dt_range[0] <= date <= dt_range[1]: was_already_downloaded = True # One is enough break worked = monitor._worked data_source.sync(dt_range, monitor=monitor.child(work=step * 0.9)) if worked == monitor._worked: monitor.progress(work=step * 0.9) ds = data_source.open_dataset(dt_range) # Filter the dataset ds = select_var(ds, var) try: if res == 0: res = ds / n_years else: # Xarray doesn't do automatic alignment for in place # operations, hence we have to do it manually res = res + ds.reindex_like(res) / n_years except TypeError: raise TypeError('One or more data arrays feature a dtype that\ can not be divided. Consider using the var\ parameter to filter the dataset.') ds.close() # delete data for the current year, if it should be deleted and it # was not already downloaded. if (not save) and (not was_already_downloaded): data_source.delete_local(dt_range) monitor.progress(work=step * 0.1) year = year + 1 monitor.progress(msg='Saving the LTA dataset') save_dataset(res, file) monitor.progress(total_work * 0.1) return res