def load(self, dataset_keys, previous_datasets=None): """Load `dataset_keys`. If `previous_datasets` is provided, do not reload those.""" all_datasets = previous_datasets or DatasetDict() datasets = DatasetDict() # Include coordinates in the list of datasets to load dsids = [self.get_dataset_key(ds_key) for ds_key in dataset_keys] coordinates = self._get_coordinates_for_dataset_keys(dsids) all_dsids = list(set().union(*coordinates.values())) + dsids for dsid in all_dsids: if dsid in all_datasets: continue coords = [ all_datasets.get(cid, None) for cid in coordinates.get(dsid, []) ] ds = self._load_dataset_with_area(dsid, coords) if ds is not None: all_datasets[dsid] = ds if dsid in dsids: datasets[dsid] = ds self._load_ancillary_variables(all_datasets) return datasets
def _load_config(self, composite_configs, **kwargs): if not isinstance(composite_configs, (list, tuple)): composite_configs = [composite_configs] conf = {} for composite_config in composite_configs: with open(composite_config) as conf_file: conf = recursive_dict_update(conf, yaml.load(conf_file)) try: sensor_name = conf['sensor_name'] except KeyError: LOG.debug('No "sensor_name" tag found in %s, skipping.', composite_config) return sensor_id = sensor_name.split('/')[-1] sensor_deps = sensor_name.split('/')[:-1] compositors = self.compositors.setdefault(sensor_id, DatasetDict()) modifiers = self.modifiers.setdefault(sensor_id, {}) for sensor_dep in reversed(sensor_deps): if sensor_dep not in self.compositors or sensor_dep not in self.modifiers: self.load_sensor_composites(sensor_dep) if sensor_deps: compositors.update(self.compositors[sensor_deps[-1]]) modifiers.update(self.modifiers[sensor_deps[-1]]) for composite_type in ['modifiers', 'composites']: if composite_type not in conf: continue for composite_name in conf[composite_type]: self._process_composite_config(composite_name, conf, composite_type, sensor_id, composite_config, **kwargs)
def _read_datasets(self, dataset_nodes, **kwargs): """Read the given datasets from file.""" # Sort requested datasets by reader reader_datasets = {} for node in dataset_nodes: ds_id = node.name # if we already have this node loaded or the node was assigned # by the user (node data is None) then don't try to load from a # reader if ds_id in self.datasets or not isinstance(node.data, dict): continue reader_name = node.data.get('reader_name') if reader_name is None: # This shouldn't be possible raise RuntimeError("Dependency tree has a corrupt node.") reader_datasets.setdefault(reader_name, set()).add(ds_id) # load all datasets for one reader at a time loaded_datasets = DatasetDict() for reader_name, ds_ids in reader_datasets.items(): reader_instance = self.readers[reader_name] new_datasets = reader_instance.load(ds_ids, **kwargs) loaded_datasets.update(new_datasets) self.datasets.update(loaded_datasets) return loaded_datasets
def __init__(self, filenames=None, ppp_config_dir=get_environ_config_dir(), reader=None, base_dir=None, **metadata): """The Scene object constructor. Args: filenames: A sequence of files that will be used to load data from. ppp_config_dir: The directory containing the configuration files for satpy. reader: The name of the reader to use for loading the data. base_dir: The directory to search for files containing the data to load. metadata: Free metadata information. """ InfoObject.__init__(self, **metadata) # Set the PPP_CONFIG_DIR in the environment in case it's used elsewhere # in pytroll LOG.debug("Setting 'PPP_CONFIG_DIR' to '%s'", ppp_config_dir) os.environ["PPP_CONFIG_DIR"] = self.ppp_config_dir = ppp_config_dir self.readers = self.create_reader_instances(filenames=filenames, base_dir=base_dir, reader=reader) self.info.update(self._compute_metadata_from_readers()) self.datasets = DatasetDict() self.cpl = CompositorLoader(self.ppp_config_dir) self.compositors = {} self.wishlist = set()
def setUp(self): """Create a test DatasetDict.""" from satpy.dataset import DatasetID from satpy.readers import DatasetDict self.regular_dict = regular_dict = { DatasetID(name="test", wavelength=(0, 0.5, 1), resolution=1000): "1", DatasetID(name="testh", wavelength=(0, 0.5, 1), resolution=500): "1h", DatasetID(name="test2", wavelength=(1, 1.5, 2), resolution=1000): "2", DatasetID(name="test3", wavelength=(1.2, 1.7, 2.2), resolution=1000): "3", DatasetID(name="test4", calibration="radiance", polarization="V"): "4rad", DatasetID(name="test4", calibration="reflectance", polarization="H"): "4refl", DatasetID(name="test5", modifiers=('mod1', 'mod2')): "5_2mod", DatasetID(name="test5", modifiers=('mod2', )): "5_1mod", DatasetID(name='test6', level=100): '6_100', DatasetID(name='test6', level=200): '6_200', } self.test_dict = DatasetDict(regular_dict)
def load_compositors(self, sensor_names): """Load all compositor configs for the provided sensors. Args: sensor_names (list of strings): Sensor names that have matching ``sensor_name.yaml`` config files. Returns: (comps, mods): Where `comps` is a dictionary: sensor_name -> composite ID -> compositor object And `mods` is a dictionary: sensor_name -> modifier name -> (modifier class, modifiers options) Note that these dictionaries are copies of those cached in this object. """ comps = {} mods = {} for sensor_name in sensor_names: if sensor_name not in self.compositors: self.load_sensor_composites(sensor_name) if sensor_name in self.compositors: comps[sensor_name] = DatasetDict( self.compositors[sensor_name].copy()) mods[sensor_name] = self.modifiers[sensor_name].copy() return comps, mods
def test_init_dict(self): """Test DatasetDict init with a regular dict argument.""" from satpy.dataset import DatasetID from satpy.readers import DatasetDict regular_dict = {DatasetID(name="test", wavelength=(0, 0.5, 1)): "1", } d = DatasetDict(regular_dict) self.assertEqual(d, regular_dict)
def test_init_dict(self): from satpy.readers import DatasetDict, DatasetID regular_dict = { DatasetID(name="test", wavelength=(0, 0.5, 1)): "1", } d = DatasetDict(regular_dict) self.assertEqual(d, regular_dict)
def load(self, dataset_keys): loaded_navs = {} datasets = DatasetDict() for dataset_key in dataset_keys: dsid = self.get_dataset_key(dataset_key) ds_info = self.ids[dsid] # Get the file handler to load this dataset (list or single string) filetype = self._preferred_filetype(ds_info['file_type']) if filetype is None: raise RuntimeError( "Required file type '{}' not found or loaded".format( ds_info['file_type'])) file_handlers = self.file_handlers[filetype] all_shapes, proj = self._load_dataset(file_handlers, dsid, ds_info) datasets[dsid] = proj if isinstance(proj, Projectable) and ('area' not in proj.info or proj.info['area'] is None): # we need to load the area because the file handlers didn't navid = AreaID(ds_info.get('navigation'), dsid.resolution) if navid.name is None or navid.name not in self.config[ 'navigations']: try: nav_filetype = filetype navid = dsid nav_info = ds_info nav_fhs = self.file_handlers[nav_filetype] ds_area = self._load_area(navid, nav_fhs, nav_info, all_shapes, proj.shape) loaded_navs[navid.name] = ds_area proj.info["area"] = ds_area except NotImplementedError as err: # we don't know how to load navigation LOG.warning("Can't load navigation for {}: {}".format( dsid, str(err))) elif navid.name in loaded_navs: proj.info["area"] = loaded_navs[navid.name] else: nav_info = self.config['navigations'][navid.name] nav_filetype = self._preferred_filetype( nav_info['file_type']) if nav_filetype is None: raise RuntimeError( "Required file type '{}' not found or loaded". format(nav_info['file_type'])) nav_fhs = self.file_handlers[nav_filetype] ds_area = self._load_area(navid, nav_fhs, nav_info, all_shapes, proj.shape) loaded_navs[navid.name] = ds_area proj.info["area"] = ds_area return datasets
def __init__(self, filenames=None, ppp_config_dir=None, reader=None, base_dir=None, **info): """The Scene object constructor. """ # Get PPP_CONFIG_DIR self.ppp_config_dir = ppp_config_dir or get_environ_config_dir() # Set the PPP_CONFIG_DIR in the environment in case it's used elsewhere # in pytroll LOG.debug("Setting 'PPP_CONFIG_DIR' to '%s'", self.ppp_config_dir) os.environ["PPP_CONFIG_DIR"] = self.ppp_config_dir InfoObject.__init__(self, **info) self.readers = {} self.datasets = DatasetDict() self.cpl = CompositorLoader(self.ppp_config_dir) self.compositors = {} self.wishlist = set() if filenames is not None and not filenames: raise ValueError("Filenames are specified but empty") finder = ReaderFinder( ppp_config_dir=self.ppp_config_dir, base_dir=base_dir, start_time=self.info.get('start_time'), end_time=self.info.get('end_time'), area=self.info.get('area'), ) try: reader_instances = finder(reader=reader, sensor=self.info.get("sensor"), filenames=filenames) except ValueError as err: if filenames is None and base_dir is None: LOG.info( 'Neither filenames nor base_dir provided, ' 'creating an empty scene (error was %s)', str(err)) reader_instances = [] else: raise # reader finder could return multiple readers sensors = [] for reader_instance in reader_instances: if reader_instance: self.readers[reader_instance.name] = reader_instance sensors.extend(reader_instance.sensor_names) # if the user didn't tell us what sensors to work with, let's figure it # out if not self.info.get("sensor"): self.info["sensor"] = sensors
def load(self, dataset_keys): """Load *dataset_keys*.""" all_datasets = DatasetDict() datasets = DatasetDict() # Include coordinates in the list of datasets to load dsids = [self.get_dataset_key(ds_key) for ds_key in dataset_keys] coordinates = self._get_coordinates_for_dataset_keys(dsids) all_dsids = list(set().union(*coordinates.values())) + dsids for dsid in all_dsids: coords = [all_datasets.get(cid, None) for cid in coordinates.get(dsid, [])] ds = self._load_dataset_with_area(dsid, coords) if ds is not None: all_datasets[dsid] = ds if dsid in dsids: datasets[dsid] = ds return datasets
def test_get_item(self): from satpy.readers import DatasetDict, DatasetID regular_dict = { DatasetID(name="test", wavelength=(0, 0.5, 1), resolution=1000): "1", DatasetID(name="testh", wavelength=(0, 0.5, 1), resolution=500): "1h", DatasetID(name="test2", wavelength=(1, 1.5, 2), resolution=1000): "2", } d = DatasetDict(regular_dict) self.assertEqual(d["test"], "1") self.assertEqual(d[1.5], "2") self.assertEqual(d[DatasetID(wavelength=1.5)], "2") self.assertEqual(d[DatasetID(wavelength=0.5, resolution=1000)], "1") self.assertEqual(d[DatasetID(wavelength=0.5, resolution=500)], "1h")
def test_get_keys_by_datasetid(self): from satpy.readers import DatasetDict, DatasetID did_list = [ DatasetID(name="test", wavelength=(0, 0.5, 1), resolution=1000), DatasetID(name="testh", wavelength=(0, 0.5, 1), resolution=500), DatasetID(name="test2", wavelength=(1, 1.5, 2), resolution=1000) ] val_list = ["1", "1h", "2"] d = DatasetDict(dict(zip(did_list, val_list))) self.assertIn(did_list[0], d.get_keys_by_datasetid(DatasetID(wavelength=0.5))) self.assertIn(did_list[1], d.get_keys_by_datasetid(DatasetID(wavelength=0.5))) self.assertIn(did_list[2], d.get_keys_by_datasetid(DatasetID(wavelength=1.5))) self.assertIn(did_list[0], d.get_keys_by_datasetid(DatasetID(resolution=1000))) self.assertIn(did_list[2], d.get_keys_by_datasetid(DatasetID(resolution=1000)))
def read_datasets(self, dataset_nodes, **kwargs): """Read the given datasets from file.""" # Sort requested datasets by reader reader_datasets = {} for node in dataset_nodes: ds_id = node.name if ds_id in self.datasets and self.datasets[ds_id].is_loaded(): continue reader_name = node.data['reader_name'] reader_datasets.setdefault(reader_name, set()).add(ds_id) # load all datasets for one reader at a time loaded_datasets = DatasetDict() for reader_name, ds_ids in reader_datasets.items(): reader_instance = self.readers[reader_name] new_datasets = reader_instance.load(ds_ids, **kwargs) loaded_datasets.update(new_datasets) self.datasets.update(loaded_datasets) return loaded_datasets
def test_init_noargs(self): from satpy.readers import DatasetDict d = DatasetDict()
def test_init_noargs(self): """Test DatasetDict init with no arguments.""" from satpy.readers import DatasetDict d = DatasetDict() self.assertIsInstance(d, dict)
def __init__(self, filenames=None, reader=None, filter_parameters=None, reader_kwargs=None, ppp_config_dir=get_environ_config_dir(), base_dir=None, sensor=None, start_time=None, end_time=None, area=None): """Initialize Scene with Reader and Compositor objects. To load data `filenames` and preferably `reader` must be specified. If `filenames` is provided without `reader` then the available readers will be searched for a Reader that can support the provided files. This can take a considerable amount of time so it is recommended that `reader` always be provided. Note without `filenames` the Scene is created with no Readers available requiring Datasets to be added manually:: scn = Scene() scn['my_dataset'] = Dataset(my_data_array, **my_info) Args: filenames (iterable or dict): A sequence of files that will be used to load data from. A ``dict`` object should map reader names to a list of filenames for that reader. reader (str or list): The name of the reader to use for loading the data or a list of names. filter_parameters (dict): Specify loaded file filtering parameters. Shortcut for `reader_kwargs['filter_parameters']`. reader_kwargs (dict): Keyword arguments to pass to specific reader instances. ppp_config_dir (str): The directory containing the configuration files for satpy. base_dir (str): (DEPRECATED) The directory to search for files containing the data to load. If *filenames* is also provided, this is ignored. sensor (list or str): (DEPRECATED: Use `find_files_and_readers` function) Limit used files by provided sensors. area (AreaDefinition): (DEPRECATED: Use `filter_parameters`) Limit used files by geographic area. start_time (datetime): (DEPRECATED: Use `filter_parameters`) Limit used files by starting time. end_time (datetime): (DEPRECATED: Use `filter_parameters`) Limit used files by ending time. """ super(Scene, self).__init__() # Set the PPP_CONFIG_DIR in the environment in case it's used elsewhere in pytroll LOG.debug("Setting 'PPP_CONFIG_DIR' to '%s'", ppp_config_dir) os.environ["PPP_CONFIG_DIR"] = self.ppp_config_dir = ppp_config_dir if not filenames and (start_time or end_time or base_dir): import warnings warnings.warn( "Deprecated: Use " + "'from satpy import find_files_and_readers' to find files") from satpy import find_files_and_readers filenames = find_files_and_readers( start_time=start_time, end_time=end_time, base_dir=base_dir, reader=reader, sensor=sensor, ppp_config_dir=self.ppp_config_dir, reader_kwargs=reader_kwargs, ) elif start_time or end_time or area: import warnings warnings.warn( "Deprecated: Use " + "'filter_parameters' to filter loaded files by 'start_time', " + "'end_time', or 'area'.") fp = filter_parameters if filter_parameters else {} fp.update({ 'start_time': start_time, 'end_time': end_time, 'area': area, }) filter_parameters = fp if filter_parameters: if reader_kwargs is None: reader_kwargs = {} reader_kwargs.setdefault('filter_parameters', {}).update(filter_parameters) if filenames and isinstance(filenames, str): raise ValueError( "'filenames' must be a list of files: Scene(filenames=[filename])" ) self.readers = self.create_reader_instances( filenames=filenames, reader=reader, reader_kwargs=reader_kwargs) self.attrs.update(self._compute_metadata_from_readers()) self.datasets = DatasetDict() self.cpl = CompositorLoader(self.ppp_config_dir) comps, mods = self.cpl.load_compositors(self.attrs['sensor']) self.wishlist = set() self.dep_tree = DependencyTree(self.readers, comps, mods) self.resamplers = {}
def load(self, dataset_keys, area=None, start_time=None, end_time=None): image_files = [] pattern = self.file_patterns[0] prologue_file = None epilogue_file = None for filename in self.info['filenames']: try: file_info = parse(pattern, os.path.basename(filename)) except ValueError: continue if file_info["segment"] == "EPI": epilogue_file = filename elif file_info["segment"] == "PRO": prologue_file = filename else: image_files.append(filename) start_times = set() datasets = DatasetDict() area_converted_to_extent = False area_extent = None for ds in dataset_keys: channel_files = [] for filename in image_files: file_info = parse(pattern, os.path.basename(filename)) if file_info["dataset_name"] == ds.name: channel_files.append(filename) start_times.add(file_info['start_time']) if not channel_files: continue kwargs = {} if 'platform_name' in self.info: kwargs['platform_name'] = self.info['platform_name'] # Convert area definitions to maximal area_extent if not area_converted_to_extent and area is not None: metadata = xrit.sat.load_files(prologue_file, channel_files, epilogue_file, only_metadata=True, **kwargs) # otherwise use the default value (MSG3 extent at # lon0=0.0), that is, do not pass default_extent=area_extent area_extent = area_defs_to_extent( [area], metadata.proj4_params) area_converted_to_extent = True try: calibrate = 1 if ds.calibration == 'counts': calibrate = 0 elif ds.calibration == 'radiance': calibrate = 2 image = xrit.sat.load_files(prologue_file, channel_files, epilogue_file, mask=True, calibrate=calibrate, **kwargs) if area_extent: metadata, data = image(area_extent) else: metadata, data = image() except CalibrationError: LOGGER.warning( "Loading non calibrated data since calibration failed.") image = xrit.sat.load_files(prologue_file, channel_files, epilogue_file, mask=True, calibrate=False, **kwargs) if area_extent: metadata, data = image(area_extent) else: metadata, data = image() except ReaderError as err: # if dataset can't be found, go on with next dataset LOGGER.error(str(err)) continue if len(metadata.instruments) != 1: sensor = None else: sensor = metadata.instruments[0] units = {'ALBEDO(%)': '%', 'KELVIN': 'K'} standard_names = {'1': 'counts', 'W m-2 sr-1 m-1': 'toa_outgoing_radiance_per_unit_wavelength', '%': 'toa_bidirectional_reflectance', 'K': 'toa_brightness_temperature'} unit = units.get(metadata.calibration_unit, metadata.calibration_unit) projectable = Projectable( data, name=ds.name, units=unit, standard_name=standard_names[unit], sensor=sensor, start_time=min(start_times), id=ds) # Build an area on the fly from the mipp metadata proj_params = getattr(metadata, "proj4_params").split(" ") proj_dict = {} for param in proj_params: key, val = param.split("=") proj_dict[key] = val if IS_PYRESAMPLE_LOADED: # Build area_def on-the-fly projectable.info["area"] = geometry.AreaDefinition( str(metadata.area_extent) + str(data.shape), "On-the-fly area", proj_dict["proj"], proj_dict, data.shape[1], data.shape[0], metadata.area_extent) else: LOGGER.info("Could not build area, pyresample missing...") datasets[ds] = projectable return datasets
def __init__(self, filenames=None, ppp_config_dir=get_environ_config_dir(), reader=None, base_dir=None, sensor=None, start_time=None, end_time=None, area=None, reader_kwargs=None, **metadata): """The Scene object constructor. Note to load data either `filenames`, `reader`, or a 'base_dir' must be specified. If `filenames` is not specified then `reader` must be specified to search the current directory or `base_dir` if specified. If neither `filenames` nor `reader` is specified then `base_dir` will be used to find any files matching the file patterns for any configured readers. Otherwise the Scene is created with no Readers available meaning Datasets must be added manually: scn = Scene(sensor='viirs', start_time=start_time) scn['my_dataset'] = Dataset(my_data_array, **my_info) Args: filenames (iterable): A sequence of files that will be used to load data from. ppp_config_dir (str): The directory containing the configuration files for satpy. reader: The name of the reader to use for loading the data. base_dir (str): The directory to search for files containing the data to load. If *filenames* is also provided, this is ignored. sensor (list or str): Limit used files by provided sensors. area (AreaDefinition): Limit used files by geographic area. start_time (datetime): Limit used files by starting time. end_time (datetime): Limit used files by ending time. reader_kwargs (dict): Keyword arguments to pass to specific reader instances. metadata: Other metadata to assign to the Scene's ``.info``. """ InfoObject.__init__(self, sensor=sensor or set(), area=area, start_time=start_time, end_time=end_time, **metadata) # Set the PPP_CONFIG_DIR in the environment in case it's used elsewhere # in pytroll LOG.debug("Setting 'PPP_CONFIG_DIR' to '%s'", ppp_config_dir) os.environ["PPP_CONFIG_DIR"] = self.ppp_config_dir = ppp_config_dir self.readers = self.create_reader_instances( filenames=filenames, base_dir=base_dir, reader=reader, reader_kwargs=reader_kwargs) self.info.update(self._compute_metadata_from_readers()) self.datasets = DatasetDict() self.cpl = CompositorLoader(self.ppp_config_dir) comps, mods = self.cpl.load_compositors(self.info['sensor']) self.wishlist = set() self.dep_tree = DependencyTree(self.readers, comps, mods)