def load(self, satscene, filename=None, *args, **kwargs): conf = ConfigParser() conf.read(os.path.join(CONFIG_PATH, satscene.fullname + ".cfg")) options = dict(conf.items(satscene.instrument_name + "-level2", raw=True)) options["resolution"] = 1000 options["geofile"] = os.path.join(options["dir"], options["geofile"]) options.update(kwargs) fparser = Parser(options.get("filename")) gparser = Parser(options.get("geofile")) if filename is not None: datasets = {} if not isinstance(filename, (list, set, tuple)): filename = [filename] for fname in filename: if fnmatch(os.path.basename(fname), fparser.globify()): metadata = fparser.parse(os.path.basename(fname)) datasets.setdefault( metadata["start_time"], []).append(fname) elif fnmatch(os.path.basename(fname), gparser.globify()): metadata = fparser.parse(fname) datasets.setdefault( metadata["start_time"], []).append(fname) scenes = [] for start_time, dataset in datasets.iteritems(): newscn = copy.deepcopy(satscene) newscn.time_slot = start_time self.load_dataset(newscn, filename=dataset, *args, **kwargs) scenes.append(newscn) if not scenes: logger.debug("Looking for files") self.load_dataset(satscene, *args, **kwargs) else: entire_scene = assemble_segments( sorted(scenes, key=lambda x: x.time_slot)) satscene.channels = entire_scene.channels satscene.area = entire_scene.area satscene.orbit = int(entire_scene.orbit) satscene.info["orbit_number"] = int(entire_scene.orbit) else: self.load_dataset(satscene, *args, **kwargs)
def load_dataset(self, satscene, filename=None, *args, **kwargs): """Read data from file and load it into *satscene*. """ del args conf = ConfigParser() conf.read(os.path.join(CONFIG_PATH, satscene.fullname + ".cfg")) options = dict(conf.items(satscene.instrument_name + "-level2", raw=True)) options["resolution"] = 1000 options["geofile"] = os.path.join(options["dir"], options["geofile"]) options.update(kwargs) fparser = Parser(options["filename"]) gparser = Parser(options["geofile"]) if isinstance(filename, (list, set, tuple)): # we got the entire dataset. for fname in filename: if fnmatch(os.path.basename(fname), fparser.globify()): metadata = fparser.parse(os.path.basename(fname)) resolution = self.res[metadata["resolution"]] self.datafiles[resolution] = fname elif fnmatch(os.path.basename(fname), gparser.globify()): self.geofile = fname elif ((filename is not None) and fnmatch(os.path.basename(options["filename"]), fparser.globify())): # read just one file logger.debug("Reading from file: " + str(options["filename"])) filename = options["filename"] resolution = self.res[os.path.basename(filename)[5]] self.datafiles[resolution] = filename else: # find files according to config resolution = int(options["resolution"]) or 1000 for res in [250, 500, 1000]: datafile = os.path.join(options['dir'], options["filename" + str(res)]) try: self.datafiles[res] = get_filename(datafile, satscene.time_slot) except IOError: self.datafiles[res] = None logger.warning("Can't find file for resolution %s with template: %s", str(res), datafile) try: self.geofile = get_filename(options["geofile"], satscene.time_slot) except IOError: self.geofile = None logger.warning("Can't find geofile with template: %s", options['geofile']) resolution = options["resolution"] cores = options.get("cores", max(multiprocessing.cpu_count() / 4, 1)) datadict = { 1000: ['EV_250_Aggr1km_RefSB', 'EV_500_Aggr1km_RefSB', 'EV_1KM_RefSB', 'EV_1KM_Emissive'], 500: ['EV_250_Aggr500_RefSB', 'EV_500_RefSB'], 250: ['EV_250_RefSB']} loaded_bands = [] # process by dataset, reflective and emissive datasets separately resolutions = [250, 500, 1000] for res in resolutions: if res < resolution: continue logger.debug("Working on resolution %d", res) self.filename = self.datafiles[res] logger.debug("Using " + str(cores) + " cores for interpolation") try: self.data = SD(str(self.filename)) except HDF4Error as err: logger.warning("Could not load data from " + str(self.filename) + ": " + str(err)) continue datasets = datadict[res] for dataset in datasets: subdata = self.data.select(dataset) band_names = subdata.attributes()["band_names"].split(",") if len(satscene.channels_to_load & set(band_names)) > 0: # get the relative indices of the desired channels indices = [i for i, band in enumerate(band_names) if band in satscene.channels_to_load] uncertainty = self.data.select(dataset + "_Uncert_Indexes") if dataset.endswith('Emissive'): array = calibrate_tb( subdata, uncertainty, indices, band_names) else: array = calibrate_refl(subdata, uncertainty, indices) for (i, idx) in enumerate(indices): if band_names[idx] in loaded_bands: continue satscene[band_names[idx]] = array[i] # fix the resolution to match the loaded data. satscene[band_names[idx]].resolution = res loaded_bands.append(band_names[idx]) # Get the orbit number if not satscene.orbit: mda = self.data.attributes()["CoreMetadata.0"] orbit_idx = mda.index("ORBITNUMBER") satscene.orbit = int(mda[orbit_idx + 111:orbit_idx + 116]) # Get the geolocation # if resolution != 1000: # logger.warning("Cannot load geolocation at this resolution (yet).") # return for band_name in loaded_bands: lon, lat = self.get_lonlat(satscene[band_name].resolution, satscene.time_slot, cores) area = geometry.SwathDefinition(lons=lon, lats=lat) satscene[band_name].area = area # Trimming out dead sensor lines (detectors) on aqua: # (in addition channel 21 is noisy) if satscene.satname == "aqua": for band in ["6", "27", "36"]: if not satscene[band].is_loaded() or satscene[band].data.mask.all(): continue width = satscene[band].data.shape[1] height = satscene[band].data.shape[0] indices = satscene[band].data.mask.sum(1) < width if indices.sum() == height: continue satscene[band] = satscene[band].data[indices, :] satscene[band].area = geometry.SwathDefinition( lons=satscene[band].area.lons[indices, :], lats=satscene[band].area.lats[indices, :]) # Trimming out dead sensor lines (detectors) on terra: # (in addition channel 27, 30, 34, 35, and 36 are nosiy) if satscene.satname == "terra": for band in ["29"]: if not satscene[band].is_loaded() or satscene[band].data.mask.all(): continue width = satscene[band].data.shape[1] height = satscene[band].data.shape[0] indices = satscene[band].data.mask.sum(1) < width if indices.sum() == height: continue satscene[band] = satscene[band].data[indices, :] satscene[band].area = geometry.SwathDefinition( lons=satscene[band].area.lons[indices, :], lats=satscene[band].area.lats[indices, :]) for band_name in loaded_bands: band_uid = hashlib.sha1(satscene[band_name].data.mask).hexdigest() satscene[band_name].area.area_id = ("swath_" + satscene.fullname + "_" + str(satscene.time_slot) + "_" + str(satscene[ band_name].shape) + "_" + str(band_uid)) satscene[band_name].area_id = satscene[band_name].area.area_id
def load_dataset(self, satscene, filename=None, *args, **kwargs): """Read data from file and load it into *satscene*. """ del args conf = ConfigParser() conf.read(os.path.join(CONFIG_PATH, satscene.fullname + ".cfg")) options = dict(conf.items(satscene.instrument_name + "-level2", raw=True)) options["resolution"] = 1000 options["geofile"] = os.path.join(options["dir"], options["geofile"]) options.update(kwargs) fparser = Parser(options.get("filename")) gparser = Parser(options.get("geofile")) if isinstance(filename, (list, set, tuple)): # we got the entire dataset. for fname in filename: if fnmatch(os.path.basename(fname), fparser.globify()): metadata = fparser.parse(os.path.basename(fname)) resolution = self.res[metadata["resolution"]] self.datafiles[resolution] = fname elif fnmatch(os.path.basename(fname), gparser.globify()): self.geofile = fname elif ((filename is not None) and fnmatch(os.path.basename(options["filename"]), fparser.globify())): # read just one file logger.debug("Reading from file: " + str(options["filename"])) filename = options["filename"] resolution = self.res[os.path.basename(filename)[5]] self.datafiles[resolution] = filename if not self.datafiles: # find files according to config logger.debug( "Didn't get any valid file as input, looking in defined places") resolution = int(options["resolution"]) or 1000 for res in [250, 500, 1000]: datafile = globify(os.path.join(options['dir'], options["filename"]), {'resolution': self.inv_res[res], 'start_time': satscene.time_slot}) try: self.datafiles[res] = check_filename(datafile) except IOError: self.datafiles[res] = None logger.warning("Can't find file for resolution %s with template: %s", str(res), datafile) try: self.geofile = check_filename(globify(options["geofile"], {'start_time': satscene.time_slot})) except IOError: self.geofile = None logger.warning("Can't find geofile with template: %s", options['geofile']) resolution = options["resolution"] cores = options.get("cores", max(multiprocessing.cpu_count() / 4, 1)) datadict = { 1000: ['EV_250_Aggr1km_RefSB', 'EV_500_Aggr1km_RefSB', 'EV_1KM_RefSB', 'EV_1KM_Emissive'], 500: ['EV_250_Aggr500_RefSB', 'EV_500_RefSB'], 250: ['EV_250_RefSB']} loaded_bands = [] # process by dataset, reflective and emissive datasets separately resolutions = [250, 500, 1000] for res in resolutions: if res < resolution: continue logger.debug("Working on resolution %d", res) self.filename = self.datafiles[res] logger.debug("Using " + str(cores) + " cores for interpolation") try: self.data = SD(str(self.filename)) except HDF4Error as err: logger.warning("Could not load data from " + str(self.filename) + ": " + str(err)) continue datasets = datadict[res] for dataset in datasets: subdata = self.data.select(dataset) band_names = subdata.attributes()["band_names"].split(",") if len(satscene.channels_to_load & set(band_names)) > 0: # get the relative indices of the desired channels indices = [i for i, band in enumerate(band_names) if band in satscene.channels_to_load] uncertainty = self.data.select(dataset + "_Uncert_Indexes") if dataset.endswith('Emissive'): array = calibrate_tb( subdata, uncertainty, indices, band_names) else: array = calibrate_refl(subdata, uncertainty, indices) for (i, idx) in enumerate(indices): if band_names[idx] in loaded_bands: continue satscene[band_names[idx]] = array[i] # fix the resolution to match the loaded data. satscene[band_names[idx]].resolution = res loaded_bands.append(band_names[idx]) # Get the orbit number if not satscene.orbit: mda = self.data.attributes()["CoreMetadata.0"] orbit_idx = mda.index("ORBITNUMBER") satscene.orbit = int(mda[orbit_idx + 111:orbit_idx + 116]) # Get the geolocation # if resolution != 1000: # logger.warning("Cannot load geolocation at this resolution (yet).") # return for band_name in loaded_bands: lon, lat = self.get_lonlat( satscene[band_name].resolution, satscene.time_slot, cores) area = geometry.SwathDefinition(lons=lon, lats=lat) satscene[band_name].area = area # Trimming out dead sensor lines (detectors) on aqua: # (in addition channel 21 is noisy) if satscene.satname == "aqua": for band in ["6", "27", "36"]: if not satscene[band].is_loaded() or satscene[band].data.mask.all(): continue width = satscene[band].data.shape[1] height = satscene[band].data.shape[0] indices = satscene[band].data.mask.sum(1) < width if indices.sum() == height: continue satscene[band] = satscene[band].data[indices, :] satscene[band].area = geometry.SwathDefinition( lons=satscene[band].area.lons[indices, :], lats=satscene[band].area.lats[indices, :]) # Trimming out dead sensor lines (detectors) on terra: # (in addition channel 27, 30, 34, 35, and 36 are nosiy) if satscene.satname == "terra": for band in ["29"]: if not satscene[band].is_loaded() or satscene[band].data.mask.all(): continue width = satscene[band].data.shape[1] height = satscene[band].data.shape[0] indices = satscene[band].data.mask.sum(1) < width if indices.sum() == height: continue satscene[band] = satscene[band].data[indices, :] satscene[band].area = geometry.SwathDefinition( lons=satscene[band].area.lons[indices, :], lats=satscene[band].area.lats[indices, :]) for band_name in loaded_bands: band_uid = hashlib.sha1(satscene[band_name].data.mask).hexdigest() satscene[band_name].area.area_id = ("swath_" + satscene.fullname + "_" + str(satscene.time_slot) + "_" + str(satscene[ band_name].shape) + "_" + str(band_uid)) satscene[band_name].area_id = satscene[band_name].area.area_id