Ejemplo n.º 1
0
    def load(self, satscene, filename=None, *args, **kwargs):
        conf = ConfigParser()
        conf.read(os.path.join(CONFIG_PATH, satscene.fullname + ".cfg"))
        options = dict(conf.items(satscene.instrument_name + "-level2",
                                  raw=True))
        options["resolution"] = 1000
        options["geofile"] = os.path.join(options["dir"], options["geofile"])
        options.update(kwargs)

        fparser = Parser(options.get("filename"))
        gparser = Parser(options.get("geofile"))

        if filename is not None:
            datasets = {}
            if not isinstance(filename, (list, set, tuple)):
                filename = [filename]

            for fname in filename:
                if fnmatch(os.path.basename(fname), fparser.globify()):
                    metadata = fparser.parse(os.path.basename(fname))
                    datasets.setdefault(
                        metadata["start_time"], []).append(fname)
                elif fnmatch(os.path.basename(fname), gparser.globify()):
                    metadata = fparser.parse(fname)
                    datasets.setdefault(
                        metadata["start_time"], []).append(fname)

            scenes = []
            for start_time, dataset in datasets.iteritems():
                newscn = copy.deepcopy(satscene)
                newscn.time_slot = start_time
                self.load_dataset(newscn, filename=dataset, *args, **kwargs)
                scenes.append(newscn)

            if not scenes:
                logger.debug("Looking for files")
                self.load_dataset(satscene, *args, **kwargs)
            else:
                entire_scene = assemble_segments(
                    sorted(scenes, key=lambda x: x.time_slot))
                satscene.channels = entire_scene.channels
                satscene.area = entire_scene.area
                satscene.orbit = int(entire_scene.orbit)
                satscene.info["orbit_number"] = int(entire_scene.orbit)
        else:
            self.load_dataset(satscene, *args, **kwargs)
Ejemplo n.º 2
0
    def load_dataset(self, satscene, filename=None, *args, **kwargs):
        """Read data from file and load it into *satscene*.
        """
        del args
        conf = ConfigParser()
        conf.read(os.path.join(CONFIG_PATH, satscene.fullname + ".cfg"))
        options = dict(conf.items(satscene.instrument_name + "-level2",
                                  raw=True))
        options["resolution"] = 1000
        options["geofile"] = os.path.join(options["dir"], options["geofile"])
        options.update(kwargs)

        fparser = Parser(options["filename"])
        gparser = Parser(options["geofile"])

        if isinstance(filename, (list, set, tuple)):
            # we got the entire dataset.
            for fname in filename:
                if fnmatch(os.path.basename(fname), fparser.globify()):
                    metadata = fparser.parse(os.path.basename(fname))
                    resolution = self.res[metadata["resolution"]]
                    self.datafiles[resolution] = fname
                elif fnmatch(os.path.basename(fname), gparser.globify()):
                    self.geofile = fname
        elif ((filename is not None) and
              fnmatch(os.path.basename(options["filename"]), fparser.globify())):
            # read just one file
            logger.debug("Reading from file: " + str(options["filename"]))
            filename = options["filename"]
            resolution = self.res[os.path.basename(filename)[5]]
            self.datafiles[resolution] = filename
        else:
            # find files according to config
            resolution = int(options["resolution"]) or 1000

            for res in [250, 500, 1000]:
                datafile = os.path.join(options['dir'],
                                        options["filename" + str(res)])
                try:
                    self.datafiles[res] = get_filename(datafile,
                                                       satscene.time_slot)
                except IOError:
                    self.datafiles[res] = None
                    logger.warning("Can't find file for resolution %s with template: %s",
                                   str(res), datafile)

            try:
                self.geofile = get_filename(options["geofile"],
                                            satscene.time_slot)
            except IOError:
                self.geofile = None
                logger.warning("Can't find geofile with template: %s",
                               options['geofile'])

        resolution = options["resolution"]
        cores = options.get("cores", max(multiprocessing.cpu_count() / 4, 1))

        datadict = {
            1000: ['EV_250_Aggr1km_RefSB',
                   'EV_500_Aggr1km_RefSB',
                   'EV_1KM_RefSB',
                   'EV_1KM_Emissive'],
            500: ['EV_250_Aggr500_RefSB',
                  'EV_500_RefSB'],
            250: ['EV_250_RefSB']}

        loaded_bands = []

        # process by dataset, reflective and emissive datasets separately

        resolutions = [250, 500, 1000]

        for res in resolutions:
            if res < resolution:
                continue
            logger.debug("Working on resolution %d", res)
            self.filename = self.datafiles[res]

            logger.debug("Using " + str(cores) + " cores for interpolation")

            try:
                self.data = SD(str(self.filename))
            except HDF4Error as err:
                logger.warning("Could not load data from " + str(self.filename)
                               + ": " + str(err))
                continue

            datasets = datadict[res]
            for dataset in datasets:
                subdata = self.data.select(dataset)
                band_names = subdata.attributes()["band_names"].split(",")
                if len(satscene.channels_to_load & set(band_names)) > 0:
                    # get the relative indices of the desired channels
                    indices = [i for i, band in enumerate(band_names)
                               if band in satscene.channels_to_load]
                    uncertainty = self.data.select(dataset + "_Uncert_Indexes")
                    if dataset.endswith('Emissive'):
                        array = calibrate_tb(
                            subdata, uncertainty, indices, band_names)
                    else:
                        array = calibrate_refl(subdata, uncertainty, indices)
                    for (i, idx) in enumerate(indices):
                        if band_names[idx] in loaded_bands:
                            continue
                        satscene[band_names[idx]] = array[i]
                        # fix the resolution to match the loaded data.
                        satscene[band_names[idx]].resolution = res
                        loaded_bands.append(band_names[idx])

        # Get the orbit number
        if not satscene.orbit:
            mda = self.data.attributes()["CoreMetadata.0"]
            orbit_idx = mda.index("ORBITNUMBER")
            satscene.orbit = int(mda[orbit_idx + 111:orbit_idx + 116])

        # Get the geolocation
        # if resolution != 1000:
        #    logger.warning("Cannot load geolocation at this resolution (yet).")
        #    return

        for band_name in loaded_bands:
            lon, lat = self.get_lonlat(satscene[band_name].resolution, satscene.time_slot, cores)
            area = geometry.SwathDefinition(lons=lon, lats=lat)
            satscene[band_name].area = area

        # Trimming out dead sensor lines (detectors) on aqua:
        # (in addition channel 21 is noisy)
        if satscene.satname == "aqua":
            for band in ["6", "27", "36"]:
                if not satscene[band].is_loaded() or satscene[band].data.mask.all():
                    continue
                width = satscene[band].data.shape[1]
                height = satscene[band].data.shape[0]
                indices = satscene[band].data.mask.sum(1) < width
                if indices.sum() == height:
                    continue
                satscene[band] = satscene[band].data[indices, :]
                satscene[band].area = geometry.SwathDefinition(
                    lons=satscene[band].area.lons[indices, :],
                    lats=satscene[band].area.lats[indices, :])

        # Trimming out dead sensor lines (detectors) on terra:
        # (in addition channel 27, 30, 34, 35, and 36 are nosiy)
        if satscene.satname == "terra":
            for band in ["29"]:
                if not satscene[band].is_loaded() or satscene[band].data.mask.all():
                    continue
                width = satscene[band].data.shape[1]
                height = satscene[band].data.shape[0]
                indices = satscene[band].data.mask.sum(1) < width
                if indices.sum() == height:
                    continue
                satscene[band] = satscene[band].data[indices, :]
                satscene[band].area = geometry.SwathDefinition(
                    lons=satscene[band].area.lons[indices, :],
                    lats=satscene[band].area.lats[indices, :])

        for band_name in loaded_bands:
            band_uid = hashlib.sha1(satscene[band_name].data.mask).hexdigest()
            satscene[band_name].area.area_id = ("swath_" + satscene.fullname + "_"
                                                + str(satscene.time_slot) + "_"
                                                +
                                                str(satscene[
                                                    band_name].shape) + "_"
                                                + str(band_uid))
            satscene[band_name].area_id = satscene[band_name].area.area_id
Ejemplo n.º 3
0
    def load_dataset(self, satscene, filename=None, *args, **kwargs):
        """Read data from file and load it into *satscene*.
        """
        del args
        conf = ConfigParser()
        conf.read(os.path.join(CONFIG_PATH, satscene.fullname + ".cfg"))
        options = dict(conf.items(satscene.instrument_name + "-level2",
                                  raw=True))
        options["resolution"] = 1000
        options["geofile"] = os.path.join(options["dir"], options["geofile"])
        options.update(kwargs)

        fparser = Parser(options.get("filename"))
        gparser = Parser(options.get("geofile"))

        if isinstance(filename, (list, set, tuple)):
            # we got the entire dataset.
            for fname in filename:
                if fnmatch(os.path.basename(fname), fparser.globify()):
                    metadata = fparser.parse(os.path.basename(fname))
                    resolution = self.res[metadata["resolution"]]
                    self.datafiles[resolution] = fname
                elif fnmatch(os.path.basename(fname), gparser.globify()):
                    self.geofile = fname
        elif ((filename is not None) and
              fnmatch(os.path.basename(options["filename"]), fparser.globify())):
            # read just one file
            logger.debug("Reading from file: " + str(options["filename"]))
            filename = options["filename"]
            resolution = self.res[os.path.basename(filename)[5]]
            self.datafiles[resolution] = filename
        if not self.datafiles:
            # find files according to config
            logger.debug(
                "Didn't get any valid file as input, looking in defined places")
            resolution = int(options["resolution"]) or 1000

            for res in [250, 500, 1000]:
                datafile = globify(os.path.join(options['dir'],
                                                options["filename"]),
                                   {'resolution': self.inv_res[res],
                                    'start_time': satscene.time_slot})
                try:
                    self.datafiles[res] = check_filename(datafile)
                except IOError:
                    self.datafiles[res] = None
                    logger.warning("Can't find file for resolution %s with template: %s",
                                   str(res), datafile)

            try:
                self.geofile = check_filename(globify(options["geofile"],
                                                      {'start_time': satscene.time_slot}))
            except IOError:
                self.geofile = None
                logger.warning("Can't find geofile with template: %s",
                               options['geofile'])

        resolution = options["resolution"]
        cores = options.get("cores", max(multiprocessing.cpu_count() / 4, 1))

        datadict = {
            1000: ['EV_250_Aggr1km_RefSB',
                   'EV_500_Aggr1km_RefSB',
                   'EV_1KM_RefSB',
                   'EV_1KM_Emissive'],
            500: ['EV_250_Aggr500_RefSB',
                  'EV_500_RefSB'],
            250: ['EV_250_RefSB']}

        loaded_bands = []

        # process by dataset, reflective and emissive datasets separately

        resolutions = [250, 500, 1000]

        for res in resolutions:
            if res < resolution:
                continue
            logger.debug("Working on resolution %d", res)
            self.filename = self.datafiles[res]

            logger.debug("Using " + str(cores) + " cores for interpolation")

            try:
                self.data = SD(str(self.filename))
            except HDF4Error as err:
                logger.warning("Could not load data from " + str(self.filename)
                               + ": " + str(err))
                continue

            datasets = datadict[res]
            for dataset in datasets:
                subdata = self.data.select(dataset)
                band_names = subdata.attributes()["band_names"].split(",")
                if len(satscene.channels_to_load & set(band_names)) > 0:
                    # get the relative indices of the desired channels
                    indices = [i for i, band in enumerate(band_names)
                               if band in satscene.channels_to_load]
                    uncertainty = self.data.select(dataset + "_Uncert_Indexes")
                    if dataset.endswith('Emissive'):
                        array = calibrate_tb(
                            subdata, uncertainty, indices, band_names)
                    else:
                        array = calibrate_refl(subdata, uncertainty, indices)
                    for (i, idx) in enumerate(indices):
                        if band_names[idx] in loaded_bands:
                            continue
                        satscene[band_names[idx]] = array[i]
                        # fix the resolution to match the loaded data.
                        satscene[band_names[idx]].resolution = res
                        loaded_bands.append(band_names[idx])

        # Get the orbit number
        if not satscene.orbit:
            mda = self.data.attributes()["CoreMetadata.0"]
            orbit_idx = mda.index("ORBITNUMBER")
            satscene.orbit = int(mda[orbit_idx + 111:orbit_idx + 116])

        # Get the geolocation
        # if resolution != 1000:
        #    logger.warning("Cannot load geolocation at this resolution (yet).")
        #    return

        for band_name in loaded_bands:
            lon, lat = self.get_lonlat(
                satscene[band_name].resolution, satscene.time_slot, cores)
            area = geometry.SwathDefinition(lons=lon, lats=lat)
            satscene[band_name].area = area

        # Trimming out dead sensor lines (detectors) on aqua:
        # (in addition channel 21 is noisy)
        if satscene.satname == "aqua":
            for band in ["6", "27", "36"]:
                if not satscene[band].is_loaded() or satscene[band].data.mask.all():
                    continue
                width = satscene[band].data.shape[1]
                height = satscene[band].data.shape[0]
                indices = satscene[band].data.mask.sum(1) < width
                if indices.sum() == height:
                    continue
                satscene[band] = satscene[band].data[indices, :]
                satscene[band].area = geometry.SwathDefinition(
                    lons=satscene[band].area.lons[indices, :],
                    lats=satscene[band].area.lats[indices, :])

        # Trimming out dead sensor lines (detectors) on terra:
        # (in addition channel 27, 30, 34, 35, and 36 are nosiy)
        if satscene.satname == "terra":
            for band in ["29"]:
                if not satscene[band].is_loaded() or satscene[band].data.mask.all():
                    continue
                width = satscene[band].data.shape[1]
                height = satscene[band].data.shape[0]
                indices = satscene[band].data.mask.sum(1) < width
                if indices.sum() == height:
                    continue
                satscene[band] = satscene[band].data[indices, :]
                satscene[band].area = geometry.SwathDefinition(
                    lons=satscene[band].area.lons[indices, :],
                    lats=satscene[band].area.lats[indices, :])

        for band_name in loaded_bands:
            band_uid = hashlib.sha1(satscene[band_name].data.mask).hexdigest()
            satscene[band_name].area.area_id = ("swath_" + satscene.fullname + "_"
                                                + str(satscene.time_slot) + "_"
                                                +
                                                str(satscene[
                                                    band_name].shape) + "_"
                                                + str(band_uid))
            satscene[band_name].area_id = satscene[band_name].area.area_id