コード例 #1
0
def get_metadata(fname):
    """Parse metadata from the file."""
    res = None
    for section in CONFIG.sections():
        try:
            parser = Parser(CONFIG.get(section, "pattern"))
        except NoOptionError:
            continue
        if not parser.validate(fname):
            continue
        res = parser.parse(fname)
        res.update(dict(CONFIG.items(section)))

        for key in ["watcher", "pattern", "timeliness", "regions"]:
            res.pop(key, None)

        res = trigger.fix_start_end_time(res)

        if ("sensor" in res) and ("," in res["sensor"]):
            res["sensor"] = res["sensor"].split(",")

        res["uri"] = fname
        res["filename"] = os.path.basename(fname)

    return res
コード例 #2
0
ファイル: nc_pps_l2.py プロジェクト: tparker-usgs/mpop
def extract_filenames_in_time_window(file_list, starttime, endtime):
    """Extract the filenames with time inside the time interval specified.
    NB! Only tested for EARS-NWC granules. This does not support assembling
    several locally received full swaths"""

    # New EARS-NWC filenames:
    # Ex.:
    # W_XX-EUMETSAT-Darmstadt,SING+LEV+SAT,NOAA19+CT_C_EUMS_20150819124700_\
    #  33643.nc.bz2
    pnew = Parser(EARS_PPS_FILE_MASK)

    # Old EARS-NWC filenames:
    # Ex.:
    # ctth_20130910_205300_metopb.h5.bz2
    pold = Parser("{product:s}_{starttime:%Y%m%d_%H%M}00_{platform_name:s}.h5"
                  "{compression:s}")

    plocal = Parser(LOCAL_PPS_FILE_MASK)

    valid_filenames = []
    valid_times = []
    LOG.debug("Time window: (%s, %s)", str(starttime), str(endtime))
    for fname in file_list:
        try:
            data = pnew.parse(os.path.basename(fname))
        except ValueError:
            try:
                data = pold.parse(os.path.basename(fname))
            except ValueError:
                data = plocal.parse(os.path.basename(fname))

        if (data['starttime'] >= starttime and
                data['starttime'] < endtime):
            valid_filenames.append(fname)
            valid_times.append(data['starttime'])
            LOG.debug("Start time %s inside window", str(data['starttime']))
        else:
            pass

    # Can we rely on the files being sorted according to time?
    # Sort the filenames according to time:
    vtimes = np.array(valid_times)
    idx = np.argsort(vtimes)
    vfiles = np.array(valid_filenames)
    return np.take(vfiles, idx).tolist()
コード例 #3
0
 def _read_cf_from_string_export(cls, blob):
     """Read blob as a string created by `to_cf`."""
     pattern = "{central:f} {unit:s} ({min:f}-{max:f} {unit2:s})"
     from trollsift import Parser
     parser = Parser(pattern)
     res_dict = parser.parse(blob)
     res_dict.pop('unit2')
     obj = cls(**res_dict)
     return obj
コード例 #4
0
ファイル: nc_pps_l2.py プロジェクト: junjie2008v/mpop
def extract_filenames_in_time_window(file_list, starttime, endtime):
    """Extract the filenames with time inside the time interval specified.
    NB! Only tested for EARS-NWC granules. This does not support assembling
    several locally received full swaths"""

    # New EARS-NWC filenames:
    # Ex.:
    # W_XX-EUMETSAT-Darmstadt,SING+LEV+SAT,NOAA19+CT_C_EUMS_20150819124700_\
    #  33643.nc.bz2
    pnew = Parser(EARS_PPS_FILE_MASK)

    # Old EARS-NWC filenames:
    # Ex.:
    # ctth_20130910_205300_metopb.h5.bz2
    pold = Parser("{product:s}_{starttime:%Y%m%d_%H%M}00_{platform_name:s}.h5"
                  "{compression:s}")

    plocal = Parser(LOCAL_PPS_FILE_MASK)

    valid_filenames = []
    valid_times = []
    LOG.debug("Time window: (%s, %s)", str(starttime), str(endtime))
    for fname in file_list:
        try:
            data = pnew.parse(os.path.basename(fname))
        except ValueError:
            try:
                data = pold.parse(os.path.basename(fname))
            except ValueError:
                data = plocal.parse(os.path.basename(fname))

        if (data['starttime'] >= starttime and data['starttime'] < endtime):
            valid_filenames.append(fname)
            valid_times.append(data['starttime'])
            LOG.debug("Start time %s inside window", str(data['starttime']))
        else:
            pass

    # Can we rely on the files being sorted according to time?
    # Sort the filenames according to time:
    vtimes = np.array(valid_times)
    idx = np.argsort(vtimes)
    vfiles = np.array(valid_filenames)
    return np.take(vfiles, idx).tolist()
コード例 #5
0
def _match_files_to_pattern(files, path, pattern):
    if pattern is not None:
        parser = Parser(posixpath.join(path, pattern))
        matching_files = []
        for file in files:
            try:
                metadata = parser.parse(file['name'])
                file['metadata'] = metadata
                matching_files.append(file)
            except ValueError:
                pass
        return matching_files
    return files
コード例 #6
0
def get_metadata(fname):
    """Parse metadata from the file.
    """

    res = None
    for section in CONFIG.sections():
        try:
            parser = Parser(CONFIG.get(section, "pattern"))
        except NoOptionError:
            continue
        if not parser.validate(fname):
            continue
        res = parser.parse(fname)
        res.update(dict(CONFIG.items(section)))

        for key in ["watcher", "pattern", "timeliness", "regions"]:
            res.pop(key, None)

        if "duration" in res and "end_time" not in res:
            res["end_time"] = (res["start_time"] +
                               timedelta(seconds=int(res["duration"])))
        if "start_date" in res:
            res["start_time"] = datetime.combine(res["start_date"].date(),
                                                 res["start_time"].time())
            if "end_date" not in res:
                res["end_date"] = res["start_date"]
            del res["start_date"]
        if "end_date" in res:
            res["end_time"] = datetime.combine(res["end_date"].date(),
                                               res["end_time"].time())
            del res["end_date"]

        while res["start_time"] > res["end_time"]:
            res["end_time"] += timedelta(days=1)

        if "duration" in res:
            del res["duration"]

        if ("sensor" in res) and ("," in res["sensor"]):
            res["sensor"] = res["sensor"].split(",")

        res["uri"] = fname
        res["filename"] = os.path.basename(fname)

    return res
コード例 #7
0
ファイル: gatherer.py プロジェクト: tparker-usgs/trollduction
def get_metadata(fname):
    """Parse metadata from the file.
    """

    res = None
    for section in CONFIG.sections():
        try:
            parser = Parser(CONFIG.get(section, "pattern"))
        except NoOptionError:
            continue
        if not parser.validate(fname):
            continue
        res = parser.parse(fname)
        res.update(dict(CONFIG.items(section)))

        for key in ["watcher", "pattern", "timeliness", "regions"]:
            res.pop(key, None)

        if "duration" in res and "end_time" not in res:
            res["end_time"] = (res["start_time"] +
                               timedelta(seconds=int(res["duration"])))
        if "start_date" in res:
            res["start_time"] = datetime.combine(res["start_date"].date(),
                                                 res["start_time"].time())
            if "end_date" not in res:
                res["end_date"] = res["start_date"]
            del res["start_date"]
        if "end_date" in res:
            res["end_time"] = datetime.combine(res["end_date"].date(),
                                               res["end_time"].time())
            del res["end_date"]

        while res["start_time"] > res["end_time"]:
            res["end_time"] += timedelta(days=1)

        if "duration" in res:
            del res["duration"]

        if ("sensor" in res) and ("," in res["sensor"]):
            res["sensor"] = res["sensor"].split(",")

        res["uri"] = fname
        res["filename"] = os.path.basename(fname)

    return res
コード例 #8
0
ファイル: _base.py プロジェクト: pytroll/pytroll-collectors
    def _get_metadata(self, fname):
        """Parse metadata from the file."""
        parser = Parser(self._config_items["pattern"])

        res = parser.parse(fname)
        res.update(dict(self._config_items))

        for key in ["watcher", "pattern", "timeliness", "regions"]:
            res.pop(key, None)

        res = fix_start_end_time(res)

        if ("sensor" in res) and ("," in res["sensor"]):
            res["sensor"] = res["sensor"].split(",")

        res["uri"] = fname
        res["filename"] = os.path.basename(fname)

        return res
コード例 #9
0
def get_metadata_from_filename(infile_pattern, filepath):
    """From the filename and its pattern get basic metadata of the satellite observations."""
    p__ = Parser(infile_pattern)
    fname = os.path.basename(filepath)
    try:
        res = p__.parse(fname)
    except ValueError:
        # Do something!
        return None

    # Fix the end time:
    endtime = datetime(res['start_time'].year, res['start_time'].month,
                       res['start_time'].day, res['end_hour'].hour,
                       res['end_hour'].minute, res['end_hour'].second)
    if endtime < res['start_time']:
        endtime = endtime + timedelta(days=1)

    res['end_time'] = endtime

    return res
コード例 #10
0
ファイル: gatherer.py プロジェクト: ch-k/trollduction
def get_metadata(fname):
    res = None
    for section in config.sections():
        if section == "default":
            continue
        try:
            parser = Parser(config.get(section, "pattern"))
        except NoOptionError:
            continue
        if not parser.validate(fname):
            continue
        res = parser.parse(fname)
        res.update(dict(config.items(section)))

        for key in ["watcher", "pattern", "timeliness"]:
            res.pop(key, None)

        if "duration" in res and "end_time" not in res:
            res["end_time"] = (res["start_time"] +
                               timedelta(seconds=int(res["duration"])))
        if "start_date" in res:
            res["start_time"] = datetime.combine(res["start_date"].date(),
                                                 res["start_time"].time())
            if "end_date" not in res:
                res["end_date"] = res["start_date"]
            del res["start_date"]
        if "end_date" in res:
            res["end_time"] = datetime.combine(res["end_date"].date(),
                                               res["end_time"].time())
            del res["end_date"]

        while res["start_time"] > res["end_time"]:
            res["end_time"] += timedelta(days=1)

        if "duration" in res:
            del res["duration"]

        res["uri"] = fname
        res["filename"] = os.path.basename(fname)
    return res
コード例 #11
0
def get_metadata(fname):
    res = None
    for section in config.sections():
        if section == "default":
            continue
        try:
            parser = Parser(config.get(section, "pattern"))
        except NoOptionError:
            continue
        if not parser.validate(fname):
            continue
        res = parser.parse(fname)
        res.update(dict(config.items(section)))

        for key in ["watcher", "pattern", "timeliness"]:
            res.pop(key, None)

        if "duration" in res and "end_time" not in res:
            res["end_time"] = (res["start_time"] +
                               timedelta(seconds=int(res["duration"])))
        if "start_date" in res:
            res["start_time"] = datetime.combine(res["start_date"].date(),
                                                 res["start_time"].time())
            if "end_date" not in res:
                res["end_date"] = res["start_date"]
            del res["start_date"]
        if "end_date" in res:
            res["end_time"] = datetime.combine(res["end_date"].date(),
                                               res["end_time"].time())
            del res["end_date"]

        while res["start_time"] > res["end_time"]:
            res["end_time"] += timedelta(days=1)

        if "duration" in res:
            del res["duration"]

        res["uri"] = fname
        res["filename"] = os.path.basename(fname)
    return res
コード例 #12
0
ファイル: nc_pps_l2.py プロジェクト: tparker-usgs/mpop
class PPSReader(Reader):

    """Reader class for PPS files"""
    pformat = "nc_pps_l2"

    def __init__(self, *args, **kwargs):
        Reader.__init__(self, *args, **kwargs)
        # Source of the data, 'local' or 'ears'
        self._source = None
        # Parser for getting info from the file names
        self._parser = None
        # Satellite config
        self._config = None
        # Location of geolocation files, required for 'local' products
        self._cloud_product_geodir = None
        # Name of the product having geolocation for 'local' products
        self._geolocation_product_name = None

    def _read_config(self, sat_name, instrument_name):
        '''Read config for the satellite'''

        if self._config:
            return

        self._config = ConfigParser()
        configfile = os.path.join(CONFIG_PATH, sat_name + ".cfg")
        LOG.debug("Read configfile %s", configfile)
        self._config.read(configfile)

        try:
            self._cloud_product_geodir = \
                self._config.get(instrument_name + "-level3",
                                 "cloud_product_geodir",
                                 raw=True,
                                 vars=os.environ)
        except NoOptionError:
            pass

        LOG.debug("cloud_product_geodir = %s", self._cloud_product_geodir)

        try:
            self._geolocation_product_name = \
                self._config.get(instrument_name + "-level3",
                                 "geolocation_product_name",
                                 raw=True,
                                 vars=os.environ)
        except NoOptionError:
            if self._source != 'ears':
                LOG.warning("No geolocation product name given in config, "
                            "using default: %s", GEO_PRODUCT_NAME_DEFAULT)
                self._geolocation_product_name = GEO_PRODUCT_NAME_DEFAULT

    def _determine_prod_and_geo_files(self, prodfilenames):
        """From the list of product files and the products to load determine the
        product files and the geolocation files that will be considered when
        reading the data

        """

        # geofiles4product is a dict listing all geo-locations files applicable
        # for each product.
        # prodfiles4product is a dict listing all product files for a given
        # product name

        prodfiles4product = {}
        geofiles4product = {}
        if prodfilenames:
            if not isinstance(prodfilenames, (list, set, tuple)):
                prodfilenames = [prodfilenames]
            for fname in prodfilenames:
                # Only standard NWCSAF/PPS and EARS-NWC naming accepted!
                # No support for old file names (< PPSv2014)
                if (os.path.basename(fname).startswith("S_NWC") or
                        os.path.basename(fname).startswith("W_XX-EUMETSAT")):
                    if not self._parser:
                        if os.path.basename(fname).startswith("S_NWC"):
                            self._source = 'local'
                            self._parser = Parser(LOCAL_PPS_FILE_MASK)
                        else:
                            self._source = 'ears'
                            self._parser = Parser(EARS_PPS_FILE_MASK)
                else:
                    LOG.info("Unrecognized NWCSAF/PPS file: %s", fname)
                    continue

                parse_info = self._parser.parse(os.path.basename(fname))
                prodname = parse_info['product']

                if prodname not in prodfiles4product:
                    prodfiles4product[prodname] = []

                prodfiles4product[prodname].append(fname)

            # Assemble geolocation information
            if self._source == 'ears':
                # For EARS data, the files have geolocation in themselves
                for prodname, fnames in prodfiles4product.iteritems():
                    geofiles4product[prodname] = fnames
            else:
                # For locally processed data, use the geolocation from
                # the product defined in config

                if self._geolocation_product_name in prodfiles4product:
                    for prodname in prodfiles4product.keys():
                        geofiles4product[prodname] = \
                            prodfiles4product[self._geolocation_product_name]
                else:
                    # If the product files with geolocation are not used,
                    # assume that they are still available on the disk.

                    if self._cloud_product_geodir is None:
                        LOG.warning("Config option 'cloud_product_geodir' is not "
                                    "available! Assuming same directory as "
                                    "products.")
                    for prodname in prodfiles4product.keys():
                        geofiles4product[prodname] = []
                        for fname in prodfiles4product[prodname]:
                            directory = self._cloud_product_geodir or \
                                os.path.abspath(fname)
                            parse_info = \
                                self._parser.parse(os.path.basename(fname))
                            fname = fname.replace(parse_info['product'],
                                                  self._geolocation_product_name)
                            fname = os.path.join(directory, fname)
                            geofiles4product[prodname].append(fname)

            # Check that each product file has a corresponding geolocation
            # file:
            '''
            if self._geolocation_product_name:
                for prod in products:
                    if prod not in geofiles4product:
                        LOG.error("No product name %s in dict "
                                  "geofiles4product!",
                                  prod)
                        continue
                    if prod not in prodfiles4product:
                        LOG.error("No product name %s in dict "
                                  "prodfiles4product!",
                                  prod)
                        continue
                    if len(geofiles4product[prod]) != \
                       len(prodfiles4product[prod]):
                        LOG.error("Mismatch in number of product files and "
                                  "matching geolocation files!")
            '''

        return prodfiles4product, geofiles4product

    def load(self, satscene, **kwargs):
        """Read data from file and load it into *satscene*.
        """

        prodfilenames = kwargs.get('filename')
        time_interval = kwargs.get('time_interval')
        if prodfilenames and time_interval:
            LOG.warning("You have specified both a list of files " +
                        "and a time interval")
            LOG.warning("Specifying a time interval will only take effect " +
                        "if no files are specified")
            time_interval = None

        products = satscene.channels_to_load & set(PPS_PRODUCTS)
        if len(products) == 0:
            LOG.debug("No PPS cloud products to load, abort")
            return

        self._read_config(satscene.fullname, satscene.instrument_name)

        LOG.info("Products to load: %s", str(products))

        # If a list of files are provided to the load call, we disregard the
        # direcorty and filename specifications/definitions in the config file.

        if not prodfilenames:
            try:
                area_name = satscene.area_id or satscene.area.area_id
            except AttributeError:
                area_name = "satproj_?????_?????"

            # Make the list of files for the requested products:
            if isinstance(time_interval, (tuple, set, list)) and \
               len(time_interval) == 2:
                time_start, time_end = time_interval
            else:
                time_start, time_end = satscene.time_slot, None

            LOG.debug(
                "Start and end times: %s %s", str(time_start), str(time_end))
            prodfilenames = get_filenames(satscene, products, self._config,
                                          (time_start, time_end), area_name)

        LOG.debug("Product files: %s", str(prodfilenames))

        retv = self._determine_prod_and_geo_files(prodfilenames)
        prodfiles4product, geofiles4product = retv

        # Reading the products
        classes = {"CTTH": CloudTopTemperatureHeight,
                   "CT": CloudType,
                   "CMA": CloudMask,
                   "PC": PrecipitationClouds,
                   "CPP": CloudPhysicalProperties
                   }
        nodata_mask = False

        read_external_geo = {}
        for product in products:
            LOG.debug("Loading %s", product)

            if product not in prodfiles4product:
                LOG.warning("No files found for product: %s", product)
                continue

            pps_band = PPSProductData(prodfiles4product[product]).read()
            chn = classes[product]()
            chn.read(pps_band)

            if not chn.name in satscene:
                LOG.info("Adding new channel %s", chn.name)
                satscene.channels.append(chn)

            # Check if geolocation is loaded:
            if not chn.area:
                read_external_geo[product] = satscene.channels[-1].name

        # Check if some 'channel'/product needs geolocation. If some
        # product does not have geolocation, get it from the
        # geofilename:
        from pyresample import geometry

        # Load geolocation
        for chn_name in read_external_geo.values():
            LOG.debug("ch_name = %s", str(chn_name))
            chn = satscene[chn_name]
            geofilenames = geofiles4product[chn_name]
            LOG.debug("Geo-files = %s", str(geofilenames))
            geoloc = PpsGeolocationData(chn.shape,
                                        chn.granule_lengths,
                                        geofilenames).read()

            try:
                satscene[chn.name].area = geometry.SwathDefinition(
                    lons=geoloc.longitudes, lats=geoloc.latitudes)

                area_name = ("swath_" + satscene.fullname + "_" +
                             str(satscene.time_slot) + "_"
                             + str(chn.shape) + "_" +
                             chn.name)
                satscene[chn.name].area.area_id = area_name
                satscene[chn.name].area_id = area_name
            except ValueError:
                LOG.exception('Failed making a SwathDefinition: ' +
                              'min,max lons,lats = (%f %f") (%f,%f)',
                              geoloc.longitudes.data.min(),
                              geoloc.longitudes.data.max(),
                              geoloc.latitudes.data.min(),
                              geoloc.latitudes.data.max())
                LOG.warning("No geolocation loaded for %s", str(chn_name))

        # PpsGeolocationData.clear_cache()

        return
コード例 #13
0
class EventHandler(ProcessEvent):

    """
    Event handler class for inotify.
     *topic* - topic of the published messages
     *posttroll_port* - port number to publish the messages on
     *filepattern* - filepattern for finding information from the filename
    """

    def __init__(self, topic, instrument, posttroll_port=0, filepattern=None,
                 aliases=None, tbus_orbit=False, history=0, granule_length=0):
        super(EventHandler, self).__init__()

        self._pub = NoisyPublisher("trollstalker", posttroll_port, topic)
        self.pub = self._pub.start()
        self.topic = topic
        self.info = {}
        if filepattern is None:
            filepattern = '{filename}'
        self.file_parser = Parser(filepattern)
        self.instrument = instrument
        self.aliases = aliases
        self.tbus_orbit = tbus_orbit
        self.granule_length = granule_length
        self._deque = deque([], history)

    def stop(self):
        '''Stop publisher.
        '''
        self._pub.stop()

    def __clean__(self):
        '''Clean instance attributes.
        '''
        self.info = {}

    def process_IN_CLOSE_WRITE(self, event):
        """When a file is closed, process the associated event.
        """
        LOGGER.debug("trigger: IN_CLOSE_WRITE")
        self.process(event)

    def process_IN_CLOSE_NOWRITE(self, event):
        """When a nonwritable file is closed, process the associated event.
        """
        LOGGER.debug("trigger: IN_CREATE")
        self.process(event)

    def process_IN_MOVED_TO(self, event):
        """When a file is closed, process the associated event.
        """
        LOGGER.debug("trigger: IN_MOVED_TO")
        self.process(event)

    def process_IN_CREATE(self, event):
        """When a file is created, process the associated event.
        """
        LOGGER.debug("trigger: IN_CREATE")
        self.process(event)

    def process_IN_CLOSE_MODIFY(self, event):
        """When a file is modified and closed, process the associated event.
        """
        LOGGER.debug("trigger: IN_MODIFY")
        self.process(event)

    def process(self, event):
        '''Process the event'''
        # New file created and closed
        if not event.dir:
            LOGGER.debug("processing %s", event.pathname)
            # parse information and create self.info dict{}
            self.parse_file_info(event)
            if len(self.info) > 0:
                # Check if this file has been recently dealt with
                if event.pathname not in self._deque:
                    self._deque.append(event.pathname)
                    message = self.create_message()
                    LOGGER.info("Publishing message %s", str(message))
                    self.pub.send(str(message))
                else:
                    LOGGER.info("Data has been published recently, skipping.")
            self.__clean__()

    def create_message(self):
        """Create broadcasted message
        """
        return Message(self.topic, 'file', self.info)

    def parse_file_info(self, event):
        '''Parse satellite and orbit information from the filename.
        Message is sent, if a matching filepattern is found.
        '''
        try:
            LOGGER.debug("filter: %s\t event: %s",
                         self.file_parser.fmt, event.pathname)
            self.info = self.file_parser.parse(
                os.path.basename(event.pathname))
            LOGGER.debug("Extracted: %s", str(self.info))
        except ValueError:
            # Filename didn't match pattern, so empty the info dict
            LOGGER.info("Couldn't extract any usefull information")
            self.info = {}
        else:
            self.info['uri'] = event.pathname
            self.info['uid'] = os.path.basename(event.pathname)
            self.info['sensor'] = self.instrument.split(',')
            LOGGER.debug("self.info['sensor']: " + str(self.info['sensor']))

            if self.tbus_orbit and "orbit_number" in self.info:
                LOGGER.info("Changing orbit number by -1!")
                self.info["orbit_number"] -= 1

            # replace values with corresponding aliases, if any are given
            if self.aliases:
                info = self.info.copy()
                for key in info:
                    if key in self.aliases:
                        self.info['orig_'+key] = self.info[key]
                        self.info[key] = self.aliases[key][str(self.info[key])]

            # add start_time and end_time if not present
            try:
                base_time = self.info["time"]
            except KeyError:
                try:
                    base_time = self.info["nominal_time"]
                except KeyError:
                    base_time = self.info["start_time"]
            if "start_time" not in self.info:
                self.info["start_time"] = base_time
            if "start_date" in self.info:
                self.info["start_time"] = \
                    dt.datetime.combine(self.info["start_date"].date(),
                                        self.info["start_time"].time())
                if "end_date" not in self.info:
                    self.info["end_date"] = self.info["start_date"]
                del self.info["start_date"]
            if "end_date" in self.info:
                self.info["end_time"] = \
                    dt.datetime.combine(self.info["end_date"].date(),
                                        self.info["end_time"].time())
                del self.info["end_date"]
            if "end_time" not in self.info and self.granule_length > 0:
                self.info["end_time"] = base_time + dt.timedelta(seconds=self.granule_length)

            if "end_time" in self.info:
                while self.info["start_time"] > self.info["end_time"]:
                    self.info["end_time"] += dt.timedelta(days=1)
コード例 #14
0
class GeoGatherer(object):
    """Gatherer for geostationary satellite segments"""
    def __init__(self, config, section):
        self._config = config
        self._section = section
        topics = config.get(section, 'topics').split()
        services = ""
        if config.has_option(section, 'services'):
            services = config.get(section, 'services').split()
        self._listener = ListenerContainer(topics=topics, services=services)
        self._publisher = publisher.NoisyPublisher("geo_gatherer")
        self._subject = config.get(section, "publish_topic")
        self._pattern = config.get(section, 'pattern')
        self._providing_server = None
        if config.has_option(section, 'providing_server'):
            self._providing_server = config.get(section, 'providing_server')
        self._parser = Parser(self._pattern)

        try:
            self._timeliness = dt.timedelta(
                seconds=config.getint(section, "timeliness"))
        except (NoOptionError, ValueError):
            self._timeliness = dt.timedelta(seconds=20)

        self._timeout = None
        self.metadata = {}
        self.received_files = set()
        self.wanted_files = set()
        self.all_files = set()
        self.critical_files = set()
        self.delayed_files = OrderedDict()

        self.logger = logging.getLogger("geo_gatherer")
        self._loop = False

    def _clear_data(self):
        """Clear data."""
        self._timeout = None
        self.metadata = {}
        self.received_files = set()
        self.wanted_files = set()
        self.all_files = set()
        self.critical_files = set()
        self.delayed_files = OrderedDict()

    def _init_data(self, msg):
        """Init wanted, all and critical files"""
        # Init metadata struct
        for key in msg.data:
            if key not in ("uid", "uri", "channel_name", "segment"):
                self.metadata[key] = msg.data[key]
        self.metadata['dataset'] = []

        # Critical files that are required, otherwise production will fail
        self.critical_files = \
            self._compose_filenames(self._config.get(self._section,
                                                     "critical_files"))
        # These files are wanted, but not critical for production
        self.wanted_files = \
            self._compose_filenames(self._config.get(self._section,
                                                     "wanted_files"))
        self.all_files = \
            self._compose_filenames(self._config.get(self._section,
                                                     "all_files"))

    def _compose_filenames(self, itm_str):
        """Compose filename set()s based on a pattern and item string.
        itm_str is formated like ':PRO,:EPI' or 'VIS006:8,VIS008:1-8,...'"""

        # Empty set
        result = set()

        # Get copy of metadata
        meta = self.metadata.copy()
        for itm in itm_str.split(','):
            channel_name, segments = itm.split(':')
            segments = segments.split('-')
            if len(segments) > 1:
                segments = [
                    '%06d' % i for i in range(int(segments[0]),
                                              int(segments[-1]) + 1)
                ]
            meta['channel_name'] = channel_name
            for seg in segments:
                meta['segment'] = seg
                fname = self._parser.compose(meta)
                result.add(fname)

        return result

    def _publish(self):
        """Publish file dataset and reinitialize gatherer."""

        # Diagnostic logging about delayed ...
        if len(self.delayed_files) > 0:
            file_str = ''
            for key in self.delayed_files:
                file_str += "%s %f seconds, " % (key, self.delayed_files[key])
            self.logger.warning("Files received late: %s",
                                file_str.strip(', '))
        # and missing files
        missing_files = self.all_files.difference(self.received_files)
        if len(missing_files) > 0:
            self.logger.warning("Missing files: %s", ', '.join(missing_files))

        msg = message.Message(self._subject, "dataset", self.metadata)
        self.logger.info("Sending: %s", str(msg))
        self._publisher.send(str(msg))

        self._clear_data()

    def set_logger(self, logger):
        """Set logger."""
        self.logger = logger

    def collection_ready(self):
        """Determine if collection is ready to be published."""
        # If no files have been collected, return False
        if len(self.received_files) == 0:
            return False
        # If all wanted files have been received, return True
        if self.wanted_files.union(self.critical_files).issubset(
                self.received_files):
            return True
        # If all critical files have been received ...
        if self.critical_files.issubset(self.received_files):
            # and timeout is reached, return True
            if self._timeout is not None and \
               self._timeout <= dt.datetime.utcnow():
                return True
            # else, set timeout if not already running
            else:
                if self._timeout is None:
                    self._timeout = dt.datetime.utcnow() + self._timeliness
                    self.logger.info("Setting timeout to %s",
                                     str(self._timeout))
                return False

        # In other cases continue gathering
        return False

    def run(self):
        """Run GeoGatherer"""
        self._publisher.start()
        self._loop = True
        while self._loop:
            # Check if collection is ready for publication
            if self.collection_ready():
                self._publish()

            # Check listener for new messages
            msg = None
            try:
                msg = self._listener.output_queue.get(True, 1)
            except AttributeError:
                msg = self._listener.queue.get(True, 1)
            except KeyboardInterrupt:
                self.stop()
                continue
            except Queue.Empty:
                continue

            if msg.type == "file":
                self.logger.info("New message received: %s", str(msg))
                self.process(msg)

    def stop(self):
        """Stop gatherer."""
        self.logger.info("Stopping gatherer.")
        self._loop = False
        if self._listener is not None:
            self._listener.stop()
        if self._publisher is not None:
            self._publisher.stop()

    def process(self, msg):
        """Process message"""
        if self._providing_server and self._providing_server != msg.host:
            return

        mda = self._parser.parse(msg.data["uid"])
        if msg.data['uid'] in self.received_files:
            return
        # Init metadata etc if this is the first file
        if len(self.metadata) == 0:
            self._init_data(msg)
        # If the nominal time of the new segment is later than the
        # current metadata has, ...
        elif mda["nominal_time"] > self.metadata["nominal_time"]:
            # timeout ...
            self._timeout = dt.datetime.utcnow()
            # and check if the collection is ready and publish
            if self.collection_ready():
                self._publish()
                self._clear_data()
                self._init_data(msg)
            # or discard data and start new collection
            else:
                self.logger.warning("Collection not finished before new "
                                    "started")
                missing_files = self.all_files.difference(self.received_files)
                self.logger.warning("Missing files: %s", missing_files)
                self._clear_data()
                self._init_data(msg)

        # Add uid and uri
        self.metadata['dataset'].append({
            'uri': msg.data['uri'],
            'uid': msg.data['uid']
        })

        # If critical files have been received but the collection is
        # not complete, add the file to list of delayed files
        if self.critical_files.issubset(self.received_files):
            delay = dt.datetime.utcnow() - (self._timeout - self._timeliness)
            self.delayed_files[msg.data['uid']] = delay.total_seconds()

        # Add to received files
        self.received_files.add(msg.data['uid'])
コード例 #15
0
class GeoGatherer(object):

    """Gatherer for geostationary satellite segments"""

    def __init__(self, config, section):
        self._config = config
        self._section = section
        topics = config.get(section, 'topics').split()
        self._listener = ListenerContainer(topics=topics)
        self._publisher = publisher.NoisyPublisher("geo_gatherer")
        self._subject = config.get(section, "publish_topic")
        self._pattern = config.get(section, 'pattern')
        self._parser = Parser(self._pattern)

        try:
            self._timeliness = dt.timedelta(seconds=config.getint(section,
                                                                  "timeliness"))
        except (NoOptionError, ValueError):
            self._timeliness = dt.timedelta(seconds=20)

        self._timeout = None
        self.metadata = {}
        self.received_files = set()
        self.wanted_files = set()
        self.all_files = set()
        self.critical_files = set()
        self.delayed_files = OrderedDict()

        self.logger = logging.getLogger("geo_gatherer")
        self._loop = False

    def _clear_data(self):
        """Clear data."""
        self._timeout = None
        self.metadata = {}
        self.received_files = set()
        self.wanted_files = set()
        self.all_files = set()
        self.critical_files = set()
        self.delayed_files = OrderedDict()

    def _init_data(self, msg):
        """Init wanted, all and critical files"""
        # Init metadata struct
        for key in msg.data:
            if key not in ("uid", "uri", "channel_name", "segment"):
                self.metadata[key] = msg.data[key]
        self.metadata['dataset'] = []

        # Critical files that are required, otherwise production will fail
        self.critical_files = \
            self._compose_filenames(self._config.get(self._section,
                                                     "critical_files"))
        # These files are wanted, but not critical for production
        self.wanted_files = \
            self._compose_filenames(self._config.get(self._section,
                                                     "wanted_files"))
        self.all_files = \
            self._compose_filenames(self._config.get(self._section,
                                                     "all_files"))

    def _compose_filenames(self, itm_str):
        """Compose filename set()s based on a pattern and item string.
        itm_str is formated like ':PRO,:EPI' or 'VIS006:8,VIS008:1-8,...'"""

        # Empty set
        result = set()

        # Get copy of metadata
        meta = self.metadata.copy()
        for itm in itm_str.split(','):
            channel_name, segments = itm.split(':')
            segments = segments.split('-')
            if len(segments) > 1:
                segments = ['%06d' % i for i in range(int(segments[0]),
                                                          int(segments[-1])+1)]
            meta['channel_name'] = channel_name
            for seg in segments:
                meta['segment'] = seg
                fname = self._parser.compose(meta)
                result.add(fname)

        return result

    def _publish(self):
        """Publish file dataset and reinitialize gatherer."""

        # Diagnostic logging about delayed ...
        if len(self.delayed_files) > 0:
            file_str = ''
            for key in self.delayed_files:
                file_str += "%s %f seconds, " % (key, self.delayed_files[key])
            self.logger.warning("Files received late: %s", file_str.strip(', '))
        # and missing files
        missing_files = self.all_files.difference(self.received_files)
        if len(missing_files) > 0:
            self.logger.warning("Missing files: %s", ', '.join(missing_files))

        msg = message.Message(self._subject, "dataset", self.metadata)
        self.logger.info("Sending: %s", str(msg))
        self._publisher.send(str(msg))

        self._clear_data()

    def set_logger(self, logger):
        """Set logger."""
        self.logger = logger

    def collection_ready(self):
        """Determine if collection is ready to be published."""
        # If no files have been collected, return False
        if len(self.received_files) == 0:
            return False
        # If all wanted files have been received, return True
        if self.wanted_files.union(self.critical_files).issubset(\
                self.received_files):
            return True
        # If all critical files have been received ...
        if self.critical_files.issubset(self.received_files):
            # and timeout is reached, return True
            if self._timeout is not None and \
               self._timeout <= dt.datetime.utcnow():
                return True
            # else, set timeout if not already running
            else:
                if self._timeout is None:
                    self._timeout = dt.datetime.utcnow() + self._timeliness
                    self.logger.info("Setting timeout to %s",
                                     str(self._timeout))
                return False

        # In other cases continue gathering
        return False

    def run(self):
        """Run GeoGatherer"""
        self._publisher.start()
        self._loop = True
        while self._loop:
            # Check if collection is ready for publication
            if self.collection_ready():
                self._publish()

            # Check listener for new messages
            msg = None
            try:
                msg = self._listener.queue.get(True, 1)
            except KeyboardInterrupt:
                self.stop()
                continue
            except Queue.Empty:
                continue

            if msg.type == "file":
                self.logger.info("New message received: %s", str(msg))
                self.process(msg)

    def stop(self):
        """Stop gatherer."""
        self.logger.info("Stopping gatherer.")
        self._loop = False
        if self._listener is not None:
            self._listener.stop()
        if self._publisher is not None:
            self._publisher.stop()

    def process(self, msg):
        """Process message"""
        mda = self._parser.parse(msg.data["uid"])
        if msg.data['uid'] in self.received_files:
            return
        # Init metadata etc if this is the first file
        if len(self.metadata) == 0:
            self._init_data(msg)
        # If the nominal time of the new segment is later than the
        # current metadata has, ...
        elif mda["nominal_time"] > self.metadata["nominal_time"]:
            # timeout ...
            self._timeout = dt.datetime.utcnow()
            # and check if the collection is ready and publish
            if self.collection_ready():
                self._publish()
                self._clear_data()
                self._init_data(msg)
            # or discard data and start new collection
            else:
                self.logger.warning("Collection not finished before new "
                                    "started")
                missing_files = self.all_files.difference(self.received_files)
                self.logger.warning("Missing files: %s", missing_files)
                self._clear_data()
                self._init_data(msg)

        # Add uid and uri
        self.metadata['dataset'].append({'uri': msg.data['uri'],
                                         'uid': msg.data['uid']})

        # If critical files have been received but the collection is
        # not complete, add the file to list of delayed files
        if self.critical_files.issubset(self.received_files):
            delay = dt.datetime.utcnow() - (self._timeout - self._timeliness)
            self.delayed_files[msg.data['uid']] = delay.total_seconds()

        # Add to received files
        self.received_files.add(msg.data['uid'])
コード例 #16
0
class SegmentGatherer(object):

    """Gatherer for geostationary satellite segments and multifile polar
    satellite granules."""

    def __init__(self, config, section):
        self._config = config
        self._section = section
        topics = config.get(section, 'topics').split()

        try:
            nameservers = config.get(section, 'nameserver')
            nameservers = nameservers.split()
        except (NoOptionError, ValueError):
            nameservers = []

        try:
            addresses = config.get(section, 'addresses')
            addresses = addresses.split()
        except (NoOptionError, ValueError):
            addresses = None

        try:
            publish_port = config.get(section, 'publish_port')
        except NoOptionError:
            publish_port = 0

        try:
            services = config.get(section, 'services').split()
        except (NoOptionError, ValueError):
            services = ""

        self._listener = ListenerContainer(topics=topics, addresses=addresses
                                           services=services)
        self._publisher = publisher.NoisyPublisher("segment_gatherer",
                                                   port=publish_port,
                                                   nameservers=nameservers)
        self._subject = config.get(section, "publish_topic")
        self._pattern = config.get(section, 'pattern')
        self._parser = Parser(self._pattern)
        try:
            self._time_tolerance = config.getint(section, "time_tolerance")
        except NoOptionError:
            self._time_tolerance = 30
        try:
            self._timeliness = dt.timedelta(seconds=config.getint(section,
                                                                  "timeliness"))
        except (NoOptionError, ValueError):
            self._timeliness = dt.timedelta(seconds=1200)

        try:
            self._num_files_premature_publish = \
                config.getint(section, "num_files_premature_publish")
        except (NoOptionError, ValueError):
            self._num_files_premature_publish = -1

        self.slots = OrderedDict()

        self.time_name = config.get(section, 'time_name')

        self.logger = logging.getLogger("segment_gatherer")
        self._loop = False
        self._providing_server = None
        if config.has_option(section, 'providing_server'):
            self._providing_server = config.get(section, 'providing_server')

    def _clear_data(self, time_slot):
        """Clear data."""
        if time_slot in self.slots:
            del self.slots[time_slot]

    def _init_data(self, mda):
        """Init wanted, all and critical files"""
        # Init metadata struct
        metadata = mda.copy()
        metadata['dataset'] = []

        time_slot = str(metadata[self.time_name])
        self.logger.debug("Adding new slot: %s", time_slot)
        self.slots[time_slot] = {}
        self.slots[time_slot]['metadata'] = metadata.copy()

        # Critical files that are required, otherwise production will fail.
        # If there are no critical files, empty set([]) is used.
        try:
            critical_segments = self._config.get(self._section,
                                                 "critical_files")
            self.slots[time_slot]['critical_files'] = \
                self._compose_filenames(time_slot, critical_segments)
        except (NoOptionError, ValueError):
            self.slots[time_slot]['critical_files'] = set([])

        # These files are wanted, but not critical to production
        self.slots[time_slot]['wanted_files'] = \
            self._compose_filenames(time_slot,
                                    self._config.get(self._section,
                                                     "wanted_files"))
        # Name of all the files
        self.slots[time_slot]['all_files'] = \
            self._compose_filenames(time_slot,
                                    self._config.get(self._section,
                                                     "all_files"))

        self.slots[time_slot]['received_files'] = set([])
        self.slots[time_slot]['delayed_files'] = dict()
        self.slots[time_slot]['missing_files'] = set([])
        self.slots[time_slot]['timeout'] = None
        self.slots[time_slot]['files_till_premature_publish'] = \
            self._num_files_premature_publish

    def _compose_filenames(self, time_slot, itm_str):
        """Compose filename set()s based on a pattern and item string.
        itm_str is formated like ':PRO,:EPI' or 'VIS006:8,VIS008:1-8,...'"""

        # Empty set
        result = set()

        # Get copy of metadata
        meta = self.slots[time_slot]['metadata'].copy()

        # Replace variable tags (such as processing time) with
        # wildcards, as these can't be forecasted.
        try:
            meta = _copy_without_ignore_items(
                meta, ignored_keys=self._config.get(self._section,
                                                    'variable_tags').split(','))
        except NoOptionError:
            pass

        for itm in itm_str.split(','):
            channel_name, segments = itm.split(':')
            segments = segments.split('-')
            if len(segments) > 1:
                format_string = '%d'
                if len(segments[0]) > 1 and segments[0][0] == '0':
                    format_string = '%0' + str(len(segments[0])) + 'd'
                segments = [format_string % i for i in range(int(segments[0]),
                                                             int(segments[-1]) + 1)]
            meta['channel_name'] = channel_name
            for seg in segments:
                meta['segment'] = seg
                fname = self._parser.globify(meta)
                result.add(fname)

        return result

    def _publish(self, time_slot, missing_files_check=True):
        """Publish file dataset and reinitialize gatherer."""

        data = self.slots[time_slot]

        # Diagnostic logging about delayed ...
        delayed_files = data['delayed_files']
        if len(delayed_files) > 0:
            file_str = ''
            for key in delayed_files:
                file_str += "%s %f seconds, " % (key, delayed_files[key])
            self.logger.warning("Files received late: %s",
                                file_str.strip(', '))

        if missing_files_check:
            # and missing files
            missing_files = data['all_files'].difference(
                data['received_files'])
            if len(missing_files) > 0:
                self.logger.warning("Missing files: %s",
                                    ', '.join(missing_files))

        # Remove tags that are not necessary for datasets
        for tag in REMOVE_TAGS:
            try:
                del data['metadata'][tag]
            except KeyError:
                pass

        msg = message.Message(self._subject, "dataset", data['metadata'])
        self.logger.info("Sending: %s", str(msg))
        self._publisher.send(str(msg))

        # self._clear_data(time_slot)

    def set_logger(self, logger):
        """Set logger."""
        self.logger = logger

    def update_timeout(self, slot):
        slot['timeout'] = dt.datetime.utcnow() + self._timeliness
        time_slot = str(slot['metadata'][self.time_name])
        self.logger.info("Setting timeout to %s for slot %s.",
                         str(slot['timeout']),
                         time_slot)

    def slot_ready(self, slot):
        """Determine if slot is ready to be published."""
        # If no files have been collected, return False
        if len(slot['received_files']) == 0:
            return SLOT_NOT_READY

        time_slot = str(slot['metadata'][self.time_name])

        wanted_and_critical_files = slot[
            'wanted_files'].union(slot['critical_files'])
        num_wanted_and_critical_files_received = len(
            wanted_and_critical_files & slot['received_files'])

        self.logger.debug("Got %s wanted or critical files in slot %s.",
                          num_wanted_and_critical_files_received,
                          time_slot)

        if num_wanted_and_critical_files_received \
                == slot['files_till_premature_publish']:
            slot['files_till_premature_publish'] = -1
            return SLOT_READY_BUT_WAIT_FOR_MORE

        # If all wanted files have been received, return True
        if wanted_and_critical_files.issubset(
                slot['received_files']):
            self.logger.info("All files received for slot %s.",
                             time_slot)
            return SLOT_READY

        if slot['timeout'] is None:
            self.update_timeout(slot)

        if slot['timeout'] < dt.datetime.utcnow():
            if slot['critical_files'].issubset(slot['received_files']):
                # All critical files have been received
                # Timeout reached, collection ready
                self.logger.info("Timeout occured, required files received "
                                 "for slot %s.", time_slot)
                return SLOT_READY
            else:
                # Timeout reached, collection is obsolete
                self.logger.warning("Timeout occured and required files "
                                    "were not present, data discarded for "
                                    "slot %s.",
                                    time_slot)
                return SLOT_OBSOLETE_TIMEOUT

        # Timeout not reached, wait for more files
        return SLOT_NOT_READY

    def run(self):
        """Run SegmentGatherer"""
        self._publisher.start()
        self._loop = True
        while self._loop:
            # Check if there are slots ready for publication
            slots = self.slots.copy()
            for slot in slots:
                slot = str(slot)
                status = self.slot_ready(slots[slot])
                if status == SLOT_READY:
                    # Collection ready, publish and remove
                    self._publish(slot)
                    self._clear_data(slot)
                if status == SLOT_READY_BUT_WAIT_FOR_MORE:
                    # Collection ready, publish and but wait for more
                    self._publish(slot, missing_files_check=False)
                elif status == SLOT_OBSOLETE_TIMEOUT:
                    # Collection unfinished and obslote, discard
                    self._clear_data(slot)
                else:
                    # Collection unfinished, wait for more data
                    pass

            # Check listener for new messages
            msg = None
            try:
                msg = self._listener.output_queue.get(True, 1)
            except AttributeError:
                msg = self._listener.queue.get(True, 1)
            except KeyboardInterrupt:
                self.stop()
                continue
            except Queue.Empty:
                continue

            if msg.type == "file":
                if (self._providing_server and
                        self._providing_server != msg.host):
                    continue

                self.logger.info("New message received: %s", str(msg))
                self.process(msg)

    def stop(self):
        """Stop gatherer."""
        self.logger.info("Stopping gatherer.")
        self._loop = False
        if self._listener is not None:
            self._listener.stop()
        if self._publisher is not None:
            self._publisher.stop()

    def process(self, msg):
        """Process message"""
        try:
            mda = self._parser.parse(msg.data["uid"])
        except ValueError:
            self.logger.debug("Unknown file, skipping.")
            return

        metadata = {}

        # Use values parsed from the filename as basis
        for key in mda:
            if key not in DO_NOT_COPY_KEYS:
                metadata[key] = mda[key]

        # Update with data given in the message
        for key in msg.data:
            if key not in DO_NOT_COPY_KEYS:
                metadata[key] = msg.data[key]

        time_slot = self._find_time_slot(metadata[self.time_name])

        # Init metadata etc if this is the first file
        if time_slot not in self.slots:
            self._init_data(metadata)
            slot = self.slots[time_slot]
            to_add = []
            for filename in slot['all_files']:
                if filename == msg.data['uid']:
                    continue
                url = urlparse(msg.data['uri'])
                path = os.path.join(os.path.dirname(url.path), filename)
                if not os.path.exists(path):
                    continue
                new_url = list(url)
                new_url[2] = path
                uri = urlunparse(new_url)

                slot['metadata']['dataset'].append({'uri': uri,
                                                    'uid': filename})
                to_add.append(filename)

            slot['received_files'].update(to_add)
            if to_add:
                self.logger.debug("Some files were already received %s",
                                  str(to_add))
                self.update_timeout(slot)

        slot = self.slots[time_slot]

        # Replace variable tags (such as processing time) with
        # wildcards, as these can't be forecasted.
        try:
            mda = _copy_without_ignore_items(
                mda, ignored_keys=self._config.get(self._section,
                                                   'variable_tags').split(','))
        except NoOptionError:
            pass

        mask = self._parser.globify(mda)

        if mask in slot['received_files']:
            return

        # Add uid and uri
        slot['metadata']['dataset'].append({'uri': msg.data['uri'],
                                            'uid': msg.data['uid']})

        # Collect all sensors, not only the latest
        if type(msg.data["sensor"]) not in (tuple, list, set):
            msg.data["sensor"] = [msg.data["sensor"]]
        for sensor in msg.data["sensor"]:
            if "sensor" not in slot["metadata"]:
                slot["metadata"]["sensor"] = []
            if sensor not in slot["metadata"]["sensor"]:
                slot["metadata"]["sensor"].append(sensor)

        # If critical files have been received but the slot is
        # not complete, add the file to list of delayed files
        if len(slot['critical_files']) > 0 and \
           slot['critical_files'].issubset(slot['received_files']):
            delay = dt.datetime.utcnow() - (slot['timeout'] - self._timeliness)
            slot['delayed_files'][msg.data['uid']] = delay.total_seconds()

        # Add to received files
        slot['received_files'].add(mask)

    def _find_time_slot(self, time_obj):
        """Find time slot and return the slot as a string.  If no slots are
        close enough, return *str(time_obj)*"""
        for slot in self.slots:
            time_slot = self.slots[slot]['metadata'][self.time_name]
            time_diff = time_obj - time_slot
            if abs(time_diff.total_seconds()) < self._time_tolerance:
                self.logger.debug("Found existing time slot, using that")
                return str(time_slot)

        return str(time_obj)
コード例 #17
0
class EventHandler(ProcessEvent):
    """
    Event handler class for inotify.
     *topic* - topic of the published messages
     *posttroll_port* - port number to publish the messages on
     *filepattern* - filepattern for finding information from the filename
    """
    def __init__(self,
                 topic,
                 instrument,
                 config_item,
                 posttroll_port=0,
                 filepattern=None,
                 aliases=None,
                 tbus_orbit=False,
                 history=0,
                 granule_length=0,
                 custom_vars=None,
                 nameservers=[],
                 watchManager=None):
        super(EventHandler, self).__init__()

        self._pub = NoisyPublisher("trollstalker_" + config_item,
                                   posttroll_port,
                                   topic,
                                   nameservers=nameservers)
        self.pub = self._pub.start()
        self.topic = topic
        self.info = OrderedDict()
        if filepattern is None:
            filepattern = '{filename}'
        self.file_parser = Parser(filepattern)
        self.instrument = instrument
        self.aliases = aliases
        self.custom_vars = custom_vars
        self.tbus_orbit = tbus_orbit
        self.granule_length = granule_length
        self._deque = deque([], history)
        self._watchManager = watchManager
        self._watched_dirs = dict()

    def stop(self):
        '''Stop publisher.
        '''
        self._pub.stop()

    def __clean__(self):
        '''Clean instance attributes.
        '''
        self.info = OrderedDict()

    def process_IN_CLOSE_WRITE(self, event):
        """When a file is closed, process the associated event.
        """
        LOGGER.debug("trigger: IN_CLOSE_WRITE")
        self.process(event)

    def process_IN_CLOSE_NOWRITE(self, event):
        """When a nonwritable file is closed, process the associated event.
        """
        LOGGER.debug("trigger: IN_CREATE")
        self.process(event)

    def process_IN_MOVED_TO(self, event):
        """When a file is closed, process the associated event.
        """
        LOGGER.debug("trigger: IN_MOVED_TO")
        self.process(event)

    def process_IN_CREATE(self, event):
        """When a file is created, process the associated event.
        """
        LOGGER.debug("trigger: IN_CREATE")
        self.process(event)

    def process_IN_CLOSE_MODIFY(self, event):
        """When a file is modified and closed, process the associated event.
        """
        LOGGER.debug("trigger: IN_MODIFY")
        self.process(event)

    def process_IN_DELETE(self, event):
        """On delete."""
        if (event.mask & pyinotify.IN_ISDIR):
            try:
                try:
                    self._watchManager.rm_watch(
                        self._watched_dirs[event.pathname], quiet=False)
                except pyinotify.WatchManagerError:
                    #As the directory is deleted prior removing the watch will cause a error message
                    #from pyinotify. This is ok, so just pass the exception.
                    LOGGER.debug("Removed watch: {}".format(event.pathname))
                    pass
                finally:
                    del self._watched_dirs[event.pathname]
            except KeyError:
                LOGGER.warning(
                    "Dir {} not watched by inotify. Can not delete watch.".
                    format(event.pathname))
        return

    def process(self, event):
        '''Process the event'''
        # New file created and closed
        if not event.dir:
            LOGGER.debug("processing %s", event.pathname)
            # parse information and create self.info OrderedDict{}
            self.parse_file_info(event)
            if len(self.info) > 0:
                # Check if this file has been recently dealt with
                if event.pathname not in self._deque:
                    self._deque.append(event.pathname)
                    message = self.create_message()
                    LOGGER.info("Publishing message %s", str(message))
                    self.pub.send(str(message))
                else:
                    LOGGER.info("Data has been published recently, skipping.")
            self.__clean__()
        elif (event.mask & pyinotify.IN_ISDIR):
            tmask = (pyinotify.IN_CLOSE_WRITE | pyinotify.IN_MOVED_TO
                     | pyinotify.IN_CREATE | pyinotify.IN_DELETE)
            try:
                self._watched_dirs.update(
                    self._watchManager.add_watch(event.pathname, tmask))
                LOGGER.debug("Added watch on dir: {}".format(event.pathname))
            except AttributeError:
                LOGGER.error(
                    "No watchmanager given. Can not add watch on {}".format(
                        event.pathname))
                pass

    def create_message(self):
        """Create broadcasted message
        """
        return Message(self.topic, 'file', dict(self.info))

    def parse_file_info(self, event):
        '''Parse satellite and orbit information from the filename.
        Message is sent, if a matching filepattern is found.
        '''
        try:
            LOGGER.debug("filter: %s\t event: %s", self.file_parser.fmt,
                         event.pathname)
            pathname_join = os.path.basename(event.pathname)
            if 'origin_inotify_base_dir_skip_levels' in self.custom_vars:
                pathname_list = event.pathname.split('/')
                pathname_join = "/".join(pathname_list[int(
                    self.custom_vars['origin_inotify_base_dir_skip_levels']):])
            else:
                LOGGER.debug(
                    "No origin_inotify_base_dir_skip_levels in self.custom_vars"
                )

            self.info = OrderedDict()
            self.info.update(self.file_parser.parse(pathname_join))
            LOGGER.debug("Extracted: %s", str(self.info))
        except ValueError:
            # Filename didn't match pattern, so empty the info dict
            LOGGER.info("Couldn't extract any usefull information")
            self.info = OrderedDict()
        else:
            self.info['uri'] = event.pathname
            self.info['uid'] = os.path.basename(event.pathname)
            self.info['sensor'] = self.instrument.split(',')
            LOGGER.debug("self.info['sensor']: " + str(self.info['sensor']))

            if self.tbus_orbit and "orbit_number" in self.info:
                LOGGER.info("Changing orbit number by -1!")
                self.info["orbit_number"] -= 1

            # replace values with corresponding aliases, if any are given
            if self.aliases:
                info = self.info.copy()
                for key in info:
                    if key in self.aliases:
                        self.info['orig_' + key] = self.info[key]
                        self.info[key] = self.aliases[key][str(self.info[key])]

            # add start_time and end_time if not present
            try:
                base_time = self.info["time"]
            except KeyError:
                try:
                    base_time = self.info["nominal_time"]
                except KeyError:
                    base_time = self.info["start_time"]
            if "start_time" not in self.info:
                self.info["start_time"] = base_time
            if "start_date" in self.info:
                self.info["start_time"] = \
                    dt.datetime.combine(self.info["start_date"].date(),
                                        self.info["start_time"].time())
                if "end_date" not in self.info:
                    self.info["end_date"] = self.info["start_date"]
                del self.info["start_date"]
            if "end_date" in self.info:
                self.info["end_time"] = \
                    dt.datetime.combine(self.info["end_date"].date(),
                                        self.info["end_time"].time())
                del self.info["end_date"]
            if "end_time" not in self.info and self.granule_length > 0:
                self.info["end_time"] = base_time + \
                    dt.timedelta(seconds=self.granule_length)

            if "end_time" in self.info:
                while self.info["start_time"] > self.info["end_time"]:
                    self.info["end_time"] += dt.timedelta(days=1)

            if self.custom_vars is not None:
                for var_name in self.custom_vars:
                    var_pattern = self.custom_vars[var_name]
                    var_val = None
                    if '%' in var_pattern:
                        var_val = helper_functions.create_aligned_datetime_var(
                            var_pattern, self.info)
                    if var_val is None:
                        var_val = compose(var_pattern, self.info)
                    self.info[var_name] = var_val
コード例 #18
0
class SegmentGatherer(object):
    """Gatherer for geostationary satellite segments and multifile polar
    satellite granules."""
    def __init__(self, config, section):
        self._config = config
        self._section = section
        topics = config.get(section, 'topics').split()
        self._listener = ListenerContainer(topics=topics)
        self._publisher = publisher.NoisyPublisher("segment_gatherer")
        self._subject = config.get(section, "publish_topic")
        self._pattern = config.get(section, 'pattern')
        self._parser = Parser(self._pattern)

        try:
            self._timeliness = dt.timedelta(
                seconds=config.getint(section, "timeliness"))
        except (NoOptionError, ValueError):
            self._timeliness = dt.timedelta(seconds=1200)

        try:
            self._num_files_premature_publish = \
                config.getint(section, "num_files_premature_publish")
        except (NoOptionError, ValueError):
            self._num_files_premature_publish = -1

        self.slots = OrderedDict()

        self.time_name = config.get(section, 'time_name')

        self.logger = logging.getLogger("segment_gatherer")
        self._loop = False

    def _clear_data(self, time_slot):
        """Clear data."""
        if time_slot in self.slots:
            del self.slots[time_slot]

    def _init_data(self, msg, mda):
        """Init wanted, all and critical files"""
        # Init metadata struct
        metadata = {}
        for key in msg.data:
            if key not in ("uid", "uri", "channel_name", "segment"):
                metadata[key] = msg.data[key]
        metadata['dataset'] = []

        # Use also metadata parsed from the filenames
        metadata.update(mda)

        time_slot = str(metadata[self.time_name])
        self.slots[time_slot] = {}
        self.slots[time_slot]['metadata'] = metadata.copy()

        # Critical files that are required, otherwise production will fail.
        # If there are no critical files, empty set([]) is used.
        try:
            critical_segments = self._config.get(self._section,
                                                 "critical_files")
            self.slots[time_slot]['critical_files'] = \
                    self._compose_filenames(time_slot, critical_segments)
        except (NoOptionError, ValueError):
            self.slots[time_slot]['critical_files'] = set([])

        # These files are wanted, but not critical to production
        self.slots[time_slot]['wanted_files'] = \
            self._compose_filenames(time_slot,
                                    self._config.get(self._section,
                                                     "wanted_files"))
        # Name of all the files
        self.slots[time_slot]['all_files'] = \
            self._compose_filenames(time_slot,
                                    self._config.get(self._section,
                                                     "all_files"))

        self.slots[time_slot]['received_files'] = set([])
        self.slots[time_slot]['delayed_files'] = dict()
        self.slots[time_slot]['missing_files'] = set([])
        self.slots[time_slot]['timeout'] = None
        self.slots[time_slot]['files_till_premature_publish'] = \
            self._num_files_premature_publish

    def _compose_filenames(self, time_slot, itm_str):
        """Compose filename set()s based on a pattern and item string.
        itm_str is formated like ':PRO,:EPI' or 'VIS006:8,VIS008:1-8,...'"""

        # Empty set
        result = set()

        # Get copy of metadata
        meta = self.slots[time_slot]['metadata'].copy()

        # Replace variable tags (such as processing time) with a
        # wildcard, as these can't be forecasted.
        try:
            for tag in self._config.get(self._section,
                                        'variable_tags').split(','):
                meta[tag] = '*'
        except NoOptionError:
            pass

        for itm in itm_str.split(','):
            channel_name, segments = itm.split(':')
            segments = segments.split('-')
            if len(segments) > 1:
                segments = [
                    '%d' % i for i in range(int(segments[0]),
                                            int(segments[-1]) + 1)
                ]
            meta['channel_name'] = channel_name
            for seg in segments:
                meta['segment'] = seg
                fname = self._parser.compose(meta)
                result.add(fname)

        return result

    def _publish(self, time_slot, missing_files_check=True):
        """Publish file dataset and reinitialize gatherer."""

        data = self.slots[time_slot]

        # Diagnostic logging about delayed ...
        delayed_files = data['delayed_files']
        if len(delayed_files) > 0:
            file_str = ''
            for key in delayed_files:
                file_str += "%s %f seconds, " % (key, delayed_files[key])
            self.logger.warning("Files received late: %s",
                                file_str.strip(', '))

        if missing_files_check:
            # and missing files
            missing_files = data['all_files'].difference(
                data['received_files'])
            if len(missing_files) > 0:
                self.logger.warning("Missing files: %s",
                                    ', '.join(missing_files))

        msg = message.Message(self._subject, "dataset", data['metadata'])
        self.logger.info("Sending: %s", str(msg))
        self._publisher.send(str(msg))

        # self._clear_data(time_slot)

    def set_logger(self, logger):
        """Set logger."""
        self.logger = logger

    def slot_ready(self, slot):
        """Determine if slot is ready to be published."""
        # If no files have been collected, return False
        if len(slot['received_files']) == 0:
            return SLOT_NOT_READY

        time_slot = str(slot['metadata'][self.time_name])

        wanted_and_critical_files = \
            slot['wanted_files'].union(slot['critical_files'])
        num_wanted_and_critical_files_received = \
            len(wanted_and_critical_files & slot['received_files'])

        self.logger.debug("Got %s wanted or critical files in slot %s.",
                          num_wanted_and_critical_files_received, time_slot)

        if num_wanted_and_critical_files_received \
                == slot['files_till_premature_publish']:
            slot['files_till_premature_publish'] = -1
            return SLOT_READY_BUT_WAIT_FOR_MORE

        # If all wanted files have been received, return True
        if wanted_and_critical_files.issubset(slot['received_files']):
            self.logger.info("All files received for slot %s.", time_slot)
            return SLOT_READY

        if slot['critical_files'].issubset(slot['received_files']):
            # All critical files have been received
            if slot['timeout'] is None:
                # Set timeout
                slot['timeout'] = dt.datetime.utcnow() + self._timeliness
                self.logger.info("Setting timeout to %s for slot %s.",
                                 str(slot['timeout']), time_slot)
                return SLOT_NOT_READY
            elif slot['timeout'] < dt.datetime.utcnow():
                # Timeout reached, collection ready
                self.logger.info(
                    "Timeout occured, required files received "
                    "for slot %s.", time_slot)
                return SLOT_READY
            else:
                pass
        else:
            if slot['timeout'] is None:
                slot['timeout'] = dt.datetime.utcnow() + self._timeliness
                self.logger.info("Setting timeout to %s for slot %s",
                                 str(slot['timeout']), time_slot)
                return SLOT_NOT_READY

            elif slot['timeout'] < dt.datetime.utcnow():
                # Timeout reached, collection is obsolete
                self.logger.warning(
                    "Timeout occured and required files "
                    "were not present, data discarded for "
                    "slot %s.", time_slot)
                return SLOT_OBSOLETE_TIMEOUT
            else:
                pass

        # Timeout not reached, wait for more files
        return SLOT_NOT_READY

    def run(self):
        """Run SegmentGatherer"""
        self._publisher.start()
        self._loop = True
        while self._loop:
            # Check if there are slots ready for publication
            slots = self.slots.copy()
            for slot in slots:
                slot = str(slot)
                status = self.slot_ready(slots[slot])
                if status == SLOT_READY:
                    # Collection ready, publish and remove
                    self._publish(slot)
                    self._clear_data(slot)
                if status == SLOT_READY_BUT_WAIT_FOR_MORE:
                    # Collection ready, publish and but wait for more
                    self._publish(slot, missing_files_check=False)
                elif status == SLOT_OBSOLETE_TIMEOUT:
                    # Collection unfinished and obslote, discard
                    self._clear_data(slot)
                else:
                    # Collection unfinished, wait for more data
                    pass

            # Check listener for new messages
            msg = None
            try:
                msg = self._listener.queue.get(True, 1)
            except KeyboardInterrupt:
                self.stop()
                continue
            except Queue.Empty:
                continue

            if msg.type == "file":
                self.logger.info("New message received: %s", str(msg))
                self.process(msg)

    def stop(self):
        """Stop gatherer."""
        self.logger.info("Stopping gatherer.")
        self._loop = False
        if self._listener is not None:
            self._listener.stop()
        if self._publisher is not None:
            self._publisher.stop()

    def process(self, msg):
        """Process message"""
        try:
            mda = self._parser.parse(msg.data["uid"])
        except ValueError:
            self.logger.debug("Unknown file, skipping.")
            return
        time_slot = str(mda[self.time_name])

        # Init metadata etc if this is the first file
        if time_slot not in self.slots:
            self._init_data(msg, mda)

        slot = self.slots[time_slot]

        # Replace variable tags (such as processing time) with a
        # wildcard, as these can't be forecasted.
        try:
            for tag in self._config.get(self._section,
                                        'variable_tags').split(','):
                mda[tag] = '*'
        except NoOptionError:
            pass

        mask = self._parser.compose(mda)

        if mask in slot['received_files']:
            return

        # Add uid and uri
        slot['metadata']['dataset'].append({
            'uri': msg.data['uri'],
            'uid': msg.data['uid']
        })

        # If critical files have been received but the slot is
        # not complete, add the file to list of delayed files
        if len(slot['critical_files']) > 0 and \
           slot['critical_files'].issubset(slot['received_files']):
            delay = dt.datetime.utcnow() - (slot['timeout'] - self._timeliness)
            slot['delayed_files'][msg.data['uid']] = delay.total_seconds()

        # Add to received files
        slot['received_files'].add(mask)
コード例 #19
0
class EventHandler(ProcessEvent):
    """
    Event handler class for inotify.
     *topic* - topic of the published messages
     *posttroll_port* - port number to publish the messages on
     *filepattern* - filepattern for finding information from the filename
    """
    def __init__(self,
                 topic,
                 instrument,
                 posttroll_port=0,
                 filepattern=None,
                 aliases=None,
                 tbus_orbit=False,
                 history=0,
                 granule_length=0):
        super(EventHandler, self).__init__()

        self._pub = NoisyPublisher("trollstalker", posttroll_port, topic)
        self.pub = self._pub.start()
        self.topic = topic
        self.info = {}
        if filepattern is None:
            filepattern = '{filename}'
        self.file_parser = Parser(filepattern)
        self.instrument = instrument
        self.aliases = aliases
        self.tbus_orbit = tbus_orbit
        self.granule_length = granule_length
        self._deque = deque([], history)

    def stop(self):
        '''Stop publisher.
        '''
        self._pub.stop()

    def __clean__(self):
        '''Clean instance attributes.
        '''
        self.info = {}

    def process_IN_CLOSE_WRITE(self, event):
        """When a file is closed, process the associated event.
        """
        LOGGER.debug("trigger: IN_CLOSE_WRITE")
        self.process(event)

    def process_IN_CLOSE_NOWRITE(self, event):
        """When a nonwritable file is closed, process the associated event.
        """
        LOGGER.debug("trigger: IN_CREATE")
        self.process(event)

    def process_IN_MOVED_TO(self, event):
        """When a file is closed, process the associated event.
        """
        LOGGER.debug("trigger: IN_MOVED_TO")
        self.process(event)

    def process_IN_CREATE(self, event):
        """When a file is created, process the associated event.
        """
        LOGGER.debug("trigger: IN_CREATE")
        self.process(event)

    def process_IN_CLOSE_MODIFY(self, event):
        """When a file is modified and closed, process the associated event.
        """
        LOGGER.debug("trigger: IN_MODIFY")
        self.process(event)

    def process(self, event):
        '''Process the event'''
        # New file created and closed
        if not event.dir:
            LOGGER.debug("processing %s", event.pathname)
            # parse information and create self.info dict{}
            self.parse_file_info(event)
            if len(self.info) > 0:
                # Check if this file has been recently dealt with
                if event.pathname not in self._deque:
                    self._deque.append(event.pathname)
                    message = self.create_message()
                    LOGGER.info("Publishing message %s", str(message))
                    self.pub.send(str(message))
                else:
                    LOGGER.info("Data has been published recently, skipping.")
            self.__clean__()

    def create_message(self):
        """Create broadcasted message
        """
        return Message(self.topic, 'file', self.info)

    def parse_file_info(self, event):
        '''Parse satellite and orbit information from the filename.
        Message is sent, if a matching filepattern is found.
        '''
        try:
            LOGGER.debug("filter: %s\t event: %s", self.file_parser.fmt,
                         event.pathname)
            self.info = self.file_parser.parse(os.path.basename(
                event.pathname))
            LOGGER.debug("Extracted: %s", str(self.info))
        except ValueError:
            # Filename didn't match pattern, so empty the info dict
            LOGGER.info("Couldn't extract any usefull information")
            self.info = {}
        else:
            self.info['uri'] = event.pathname
            self.info['uid'] = os.path.basename(event.pathname)
            self.info['sensor'] = self.instrument.split(',')
            LOGGER.debug("self.info['sensor']: " + str(self.info['sensor']))

            if self.tbus_orbit and "orbit_number" in self.info:
                LOGGER.info("Changing orbit number by -1!")
                self.info["orbit_number"] -= 1

            # replace values with corresponding aliases, if any are given
            if self.aliases:
                info = self.info.copy()
                for key in info:
                    if key in self.aliases:
                        self.info['orig_' + key] = self.info[key]
                        self.info[key] = self.aliases[key][str(self.info[key])]

            # add start_time and end_time if not present
            try:
                base_time = self.info["time"]
            except KeyError:
                try:
                    base_time = self.info["nominal_time"]
                except KeyError:
                    base_time = self.info["start_time"]
            if "start_time" not in self.info:
                self.info["start_time"] = base_time
            if "start_date" in self.info:
                self.info["start_time"] = \
                    dt.datetime.combine(self.info["start_date"].date(),
                                        self.info["start_time"].time())
                if "end_date" not in self.info:
                    self.info["end_date"] = self.info["start_date"]
                del self.info["start_date"]
            if "end_date" in self.info:
                self.info["end_time"] = \
                    dt.datetime.combine(self.info["end_date"].date(),
                                        self.info["end_time"].time())
                del self.info["end_date"]
            if "end_time" not in self.info and self.granule_length > 0:
                self.info["end_time"] = base_time + dt.timedelta(
                    seconds=self.granule_length)

            if "end_time" in self.info:
                while self.info["start_time"] > self.info["end_time"]:
                    self.info["end_time"] += dt.timedelta(days=1)
コード例 #20
0
def update_nwp(params):
    LOG.info("METNO update nwp")

    tempfile.tempdir = params['options']['nwp_outdir']

    ecmwf_path = params['options']['ecmwf_path']
    if not os.path.exists(ecmwf_path):
        ecmwf_path = ecmwf_path.replace("storeB", "storeA")
        LOG.warning(
            "Need to replace storeB with storeA for ecmwf_path: {}".format(
                str(ecmwf_path)))

    filelist = glob(
        os.path.join(ecmwf_path, params['options']['ecmwf_prefix'] + "*"))

    if len(filelist) == 0:
        LOG.info("Found no input files! dir = " + str(
            os.path.join(ecmwf_path, params['options']['ecmwf_prefix'] + "*")))
        return

    from trollsift import Parser, compose
    filelist.sort()
    for filename in filelist:
        if params['options']['ecmwf_file_name_sift'] is not None:
            try:
                parser = Parser(params['options']['ecmwf_file_name_sift'])
            except NoOptionError as noe:
                LOG.error("NoOptionError {}".format(noe))
                continue
            if not parser.validate(os.path.basename(filename)):
                LOG.error(
                    "Parser validate on filename: {} failed.".format(filename))
                continue
            res = parser.parse("{}".format(os.path.basename(filename)))

            time_now = datetime.utcnow()
            if 'analysis_time' in res:
                if res['analysis_time'].year == 1900:
                    # This is tricky. Filename is missing year in name
                    # Need to guess the year from a compination of year now
                    # and month now and month of the analysis time taken from the filename
                    # If the month now is 1(January) and the analysis month is 12,
                    # then the time has passed New Year, but the NWP analysis time is previous year.
                    if time_now.month == 1 and res['analysis_time'].month == 12:
                        analysis_year = time_now.year - 1
                    else:
                        analysis_year = time_now.year

                    res['analysis_time'] = res['analysis_time'].replace(
                        year=analysis_year)
            else:
                LOG.error(
                    "Can not parse analysis_time in file name. Check config and filename timestamp"
                )

            if 'forecast_time' in res:
                if res['forecast_time'].year == 1900:
                    # See above for explanation
                    if res['analysis_time'].month == 12 and res[
                            'forecast_time'].month == 1:
                        forecast_year = res['analysis_time'].year + 1
                    else:
                        forecast_year = res['analysis_time'].year

                    res['forecast_time'] = res['forecast_time'].replace(
                        year=forecast_year)
            else:
                LOG.error(
                    "Can not parse forecast_time in file name. Check config and filename timestamp"
                )

            forecast_time = res['forecast_time']
            analysis_time = res['analysis_time']
            step_delta = forecast_time - analysis_time
            step = "{:03d}H{:02d}M".format(
                int(step_delta.days * 24 + step_delta.seconds / 3600), 0)
        else:
            LOG.error("Not sift pattern given. Can not parse input NWP files")

        if analysis_time < params['starttime']:
            # LOG.debug("skip analysis time {} older than search time {}".format(analysis_time, params['starttime']))
            continue

        if int(step[:3]) not in params['nlengths']:
            # LOG.debug("Skip step {}, not in {}".format(int(step[:3]), params['nlengths']))
            continue

        output_parameters = {}
        output_parameters['analysis_time'] = analysis_time
        output_parameters['step_hour'] = int(step_delta.days * 24 +
                                             step_delta.seconds / 3600)
        output_parameters['step_min'] = 0
        try:
            if not os.path.exists(params['options']['nwp_outdir']):
                os.makedirs(params['options']['nwp_outdir'])
        except OSError as e:
            LOG.error("Failed to create directory: %s", e)
        result_file = ""
        try:
            result_file = os.path.join(
                params['options']['nwp_outdir'],
                compose(params['options']['nwp_output'], output_parameters))
            _result_file = os.path.join(
                params['options']['nwp_outdir'],
                compose("." + params['options']['nwp_output'],
                        output_parameters))
            _result_file_lock = os.path.join(
                params['options']['nwp_outdir'],
                compose("." + params['options']['nwp_output'] + ".lock",
                        output_parameters))
        except Exception as e:
            LOG.error(
                "Joining outdir with output for nwp failed with: {}".format(e))

        LOG.info("Result file: {}".format(result_file))
        if os.path.exists(result_file):
            LOG.info("File: " + str(result_file) + " already there...")
            continue

        import fcntl
        import errno
        import time
        rfl = open(_result_file_lock, 'w+')
        # do some locking
        while True:
            try:
                fcntl.flock(rfl, fcntl.LOCK_EX | fcntl.LOCK_NB)
                LOG.debug("1Got lock for NWP outfile: {}".format(result_file))
                break
            except IOError as e:
                if e.errno != errno.EAGAIN:
                    raise
                else:
                    LOG.debug("Waiting for lock ... {}".format(result_file))
                    time.sleep(1)

        if os.path.exists(result_file):
            LOG.info("File: " + str(result_file) + " already there...")
            # Need to release the lock
            fcntl.flock(rfl, fcntl.LOCK_UN)
            rfl.close()
            continue

        fout = open(_result_file, 'wb')
        try:

            # Do the static fields
            # Note: field not in the filename variable, but a configured filename for static fields
            static_filename = params['options']['ecmwf_static_surface']
            if not os.path.exists(static_filename):
                static_filename = static_filename.replace("storeB", "storeA")
                LOG.warning("Need to replace storeB with storeA")

            index_vals = []
            index_keys = ['paramId', 'level']
            LOG.debug("Start building index")
            LOG.debug("Handeling file: %s", filename)
            iid = ecc.codes_index_new_from_file(filename, index_keys)
            filename_n1s = filename.replace('N2D', 'N1S')
            LOG.debug("Add to index %s", filename_n1s)
            ecc.codes_index_add_file(iid, filename_n1s)
            LOG.debug("Add to index %s", static_filename)
            ecc.codes_index_add_file(iid, static_filename)
            LOG.debug("Done index")
            for key in index_keys:
                key_vals = ecc.codes_index_get(iid, key)
                key_vals = tuple(x for x in key_vals if x != 'undef')
                index_vals.append(key_vals)

            for prod in product(*index_vals):
                for i in range(len(index_keys)):
                    ecc.codes_index_select(iid, index_keys[i], prod[i])

                while 1:
                    gid = ecc.codes_new_from_index(iid)
                    if gid is None:
                        break

                    param = ecc.codes_get(gid, index_keys[0])
                    parameters = [
                        172, 129, 235, 167, 168, 137, 130, 131, 132, 133, 134,
                        157
                    ]
                    if param in parameters:
                        LOG.debug("Doing param: %d", param)
                        copy_needed_field(gid, fout)

                    ecc.codes_release(gid)
            ecc.codes_index_release(iid)

            fout.close()
            os.rename(_result_file, result_file)

        except WrongLengthError as wle:
            LOG.error("Something wrong with the data: %s", wle)
            raise

        # In the end release the lock
        fcntl.flock(rfl, fcntl.LOCK_UN)
        rfl.close()
        os.remove(_result_file_lock)
    return
コード例 #21
0
class SegmentGatherer(object):

    """Gatherer for geostationary satellite segments and multifile polar
    satellite granules."""

    def __init__(self, config, section):
        self._config = config
        self._section = section
        topics = config.get(section, 'topics').split()
        self._listener = ListenerContainer(topics=topics)
        self._publisher = publisher.NoisyPublisher("segment_gatherer")
        self._subject = config.get(section, "publish_topic")
        self._pattern = config.get(section, 'pattern')
        self._parser = Parser(self._pattern)

        try:
            self._timeliness = dt.timedelta(seconds=config.getint(section,
                                                                  "timeliness"))
        except (NoOptionError, ValueError):
            self._timeliness = dt.timedelta(seconds=1200)

        try:
            self._num_files_premature_publish = \
                config.getint(section, "num_files_premature_publish")
        except (NoOptionError, ValueError):
            self._num_files_premature_publish = -1

        self.slots = OrderedDict()

        self.time_name = config.get(section, 'time_name')

        self.logger = logging.getLogger("segment_gatherer")
        self._loop = False

    def _clear_data(self, time_slot):
        """Clear data."""
        if time_slot in self.slots:
            del self.slots[time_slot]

    def _init_data(self, msg, mda):
        """Init wanted, all and critical files"""
        # Init metadata struct
        metadata = {}
        for key in msg.data:
            if key not in ("uid", "uri", "channel_name", "segment"):
                metadata[key] = msg.data[key]
        metadata['dataset'] = []

        # Use also metadata parsed from the filenames
        metadata.update(mda)

        time_slot = str(metadata[self.time_name])
        self.slots[time_slot] = {}
        self.slots[time_slot]['metadata'] = metadata.copy()

        # Critical files that are required, otherwise production will fail.
        # If there are no critical files, empty set([]) is used.
        try:
            critical_segments = self._config.get(self._section,
                                                 "critical_files")
            self.slots[time_slot]['critical_files'] = \
                    self._compose_filenames(time_slot, critical_segments)
        except (NoOptionError, ValueError):
            self.slots[time_slot]['critical_files'] = set([])

        # These files are wanted, but not critical to production
        self.slots[time_slot]['wanted_files'] = \
            self._compose_filenames(time_slot,
                                    self._config.get(self._section,
                                                     "wanted_files"))
        # Name of all the files
        self.slots[time_slot]['all_files'] = \
            self._compose_filenames(time_slot,
                                    self._config.get(self._section,
                                                     "all_files"))

        self.slots[time_slot]['received_files'] = set([])
        self.slots[time_slot]['delayed_files'] = dict()
        self.slots[time_slot]['missing_files'] = set([])
        self.slots[time_slot]['timeout'] = None
        self.slots[time_slot]['files_till_premature_publish'] = \
            self._num_files_premature_publish

    def _compose_filenames(self, time_slot, itm_str):
        """Compose filename set()s based on a pattern and item string.
        itm_str is formated like ':PRO,:EPI' or 'VIS006:8,VIS008:1-8,...'"""

        # Empty set
        result = set()

        # Get copy of metadata
        meta = self.slots[time_slot]['metadata'].copy()

        # Replace variable tags (such as processing time) with
        # wildcards, as these can't be forecasted.
        try:
            meta = _copy_without_ignore_items(
                meta, ignored_keys=self._config.get(self._section,
                                                    'variable_tags').split(','))
        except NoOptionError:
            pass

        for itm in itm_str.split(','):
            channel_name, segments = itm.split(':')
            segments = segments.split('-')
            if len(segments) > 1:
                segments = ['%d' % i for i in range(int(segments[0]),
                                                    int(segments[-1]) + 1)]
            meta['channel_name'] = channel_name
            for seg in segments:
                meta['segment'] = seg
                fname = self._parser.globify(meta)
                result.add(fname)

        return result

    def _publish(self, time_slot, missing_files_check=True):
        """Publish file dataset and reinitialize gatherer."""

        data = self.slots[time_slot]

        # Diagnostic logging about delayed ...
        delayed_files = data['delayed_files']
        if len(delayed_files) > 0:
            file_str = ''
            for key in delayed_files:
                file_str += "%s %f seconds, " % (key, delayed_files[key])
            self.logger.warning("Files received late: %s", file_str.strip(', '))

        if missing_files_check:
            # and missing files
            missing_files = data['all_files'].difference(data['received_files'])
            if len(missing_files) > 0:
                self.logger.warning("Missing files: %s", ', '.join(missing_files))

        msg = message.Message(self._subject, "dataset", data['metadata'])
        self.logger.info("Sending: %s", str(msg))
        self._publisher.send(str(msg))

        # self._clear_data(time_slot)

    def set_logger(self, logger):
        """Set logger."""
        self.logger = logger

    def slot_ready(self, slot):
        """Determine if slot is ready to be published."""
        # If no files have been collected, return False
        if len(slot['received_files']) == 0:
            return SLOT_NOT_READY

        time_slot = str(slot['metadata'][self.time_name])

        wanted_and_critical_files = \
            slot['wanted_files'].union(slot['critical_files'])
        num_wanted_and_critical_files_received = \
            len(wanted_and_critical_files & slot['received_files'])

        self.logger.debug("Got %s wanted or critical files in slot %s.",
                          num_wanted_and_critical_files_received,
                          time_slot)

        if num_wanted_and_critical_files_received \
                == slot['files_till_premature_publish']:
            slot['files_till_premature_publish'] = -1
            return SLOT_READY_BUT_WAIT_FOR_MORE

        # If all wanted files have been received, return True
        if wanted_and_critical_files.issubset(
                slot['received_files']):
            self.logger.info("All files received for slot %s.",
                             time_slot)
            return SLOT_READY

        if slot['critical_files'].issubset(slot['received_files']):
            # All critical files have been received
            if slot['timeout'] is None:
                # Set timeout
                slot['timeout'] = dt.datetime.utcnow() + self._timeliness
                self.logger.info("Setting timeout to %s for slot %s.",
                                 str(slot['timeout']),
                                 time_slot)
                return SLOT_NOT_READY
            elif slot['timeout'] < dt.datetime.utcnow():
                # Timeout reached, collection ready
                self.logger.info("Timeout occured, required files received "
                                 "for slot %s.", time_slot)
                return SLOT_READY
            else:
                pass
        else:
            if slot['timeout'] is None:
                slot['timeout'] = dt.datetime.utcnow() + self._timeliness
                self.logger.info("Setting timeout to %s for slot %s",
                                 str(slot['timeout']),
                                 time_slot)
                return SLOT_NOT_READY

            elif slot['timeout'] < dt.datetime.utcnow():
                # Timeout reached, collection is obsolete
                self.logger.warning("Timeout occured and required files "
                                    "were not present, data discarded for "
                                    "slot %s.",
                                    time_slot)
                return SLOT_OBSOLETE_TIMEOUT
            else:
                pass

        # Timeout not reached, wait for more files
        return SLOT_NOT_READY

    def run(self):
        """Run SegmentGatherer"""
        self._publisher.start()
        self._loop = True
        while self._loop:
            # Check if there are slots ready for publication
            slots = self.slots.copy()
            for slot in slots:
                slot = str(slot)
                status = self.slot_ready(slots[slot])
                if status == SLOT_READY:
                    # Collection ready, publish and remove
                    self._publish(slot)
                    self._clear_data(slot)
                if status == SLOT_READY_BUT_WAIT_FOR_MORE:
                    # Collection ready, publish and but wait for more
                    self._publish(slot, missing_files_check=False)
                elif status == SLOT_OBSOLETE_TIMEOUT:
                    # Collection unfinished and obslote, discard
                    self._clear_data(slot)
                else:
                    # Collection unfinished, wait for more data
                    pass

            # Check listener for new messages
            msg = None
            try:
                msg = self._listener.queue.get(True, 1)
            except KeyboardInterrupt:
                self.stop()
                continue
            except Queue.Empty:
                continue

            if msg.type == "file":
                self.logger.info("New message received: %s", str(msg))
                self.process(msg)

    def stop(self):
        """Stop gatherer."""
        self.logger.info("Stopping gatherer.")
        self._loop = False
        if self._listener is not None:
            self._listener.stop()
        if self._publisher is not None:
            self._publisher.stop()

    def process(self, msg):
        """Process message"""
        try:
            mda = self._parser.parse(msg.data["uid"])
        except ValueError:
            self.logger.debug("Unknown file, skipping.")
            return
        time_slot = str(mda[self.time_name])

        # Init metadata etc if this is the first file
        if time_slot not in self.slots:
            self._init_data(msg, mda)

        slot = self.slots[time_slot]

        # Replace variable tags (such as processing time) with
        # wildcards, as these can't be forecasted.
        try:
            mda = _copy_without_ignore_items(
                mda, ignored_keys=self._config.get(self._section,
                                                   'variable_tags').split(','))
        except NoOptionError:
            pass

        mask = self._parser.globify(mda)

        if mask in slot['received_files']:
            return

        # Add uid and uri
        slot['metadata']['dataset'].append({'uri': msg.data['uri'],
                                            'uid': msg.data['uid']})

        # If critical files have been received but the slot is
        # not complete, add the file to list of delayed files
        if len(slot['critical_files']) > 0 and \
           slot['critical_files'].issubset(slot['received_files']):
            delay = dt.datetime.utcnow() - (slot['timeout'] - self._timeliness)
            slot['delayed_files'][msg.data['uid']] = delay.total_seconds()

        # Add to received files
        slot['received_files'].add(mask)
コード例 #22
0
    # IASI_PW3_02_M01_20160309180258Z_20160309180554Z_N_O_20160309184345Z.h5
    pattern = 'IASI_PW3_02_{platform_name:3s}_{start_time:%Y%m%d%H%M%S}Z_{end_time:%Y%m%d%H%M%S}Z_N_O_{creation_time:%Y%m%d%H%M%S}Z.h5'
    p__ = Parser(pattern)

    PREFIXES = [
        'IASI_PW3_',
        'IASI_PW3_',
    ]
    ftp = FTP(HOST)
    print("connecting to %s" % HOST)
    ftp.login(USER, PASSWD)
    tempfile.tempdir = outpath
    for (remotedir, prefix) in zip(REMOTE_DIRS, PREFIXES):
        remotefiles = ftp.nlst(remotedir)
        fnames = [os.path.basename(f) for f in remotefiles]
        dates_remote = [p__.parse(s)['start_time'] for s in fnames]

        rfarr = np.array(remotefiles)
        drarr = np.array(dates_remote)
        remotefiles = rfarr[drarr > starttime].tolist()
        remotefiles = [
            r for r in remotefiles
            if (r.endswith('.h5') and os.path.basename(r).startswith(prefix))
        ]

        localfiles = [
            os.path.join(outpath, os.path.basename(f.strip('.h5')))
            for f in remotefiles
        ]

        try:
コード例 #23
0
ファイル: trollstalker.py プロジェクト: ch-k/trollduction
class EventHandler(ProcessEvent):

    """
    Event handler class for inotify.
     *topic* - topic of the published messages
     *posttroll_port* - port number to publish the messages on
     *filepattern* - filepattern for finding information from the filename
    """

    def __init__(self, topic, instrument, posttroll_port=0, filepattern=None,
                 aliases=None, tbus_orbit=False):
        super(EventHandler, self).__init__()

        self._pub = NoisyPublisher("trollstalker", posttroll_port, topic)
        self.pub = self._pub.start()
        self.topic = topic
        self.info = {}
        if filepattern is None:
            filepattern = '{filename}'
        self.file_parser = Parser(filepattern)
        self.instrument = instrument
        self.aliases = aliases
        self.tbus_orbit = tbus_orbit

    def stop(self):
        '''Stop publisher.
        '''
        self._pub.stop()

    def __clean__(self):
        '''Clean instance attributes.
        '''
        self.info = {}

    def process_IN_CLOSE_WRITE(self, event):
        """When a file is closed, process the associated event.
        """
        LOGGER.debug("trigger: IN_CLOSE_WRITE")
        self.process(event)

    def process_IN_CLOSE_NOWRITE(self, event):
        """When a nonwritable file is closed, process the associated event.
        """
        LOGGER.debug("trigger: IN_CREATE")
        self.process(event)

    def process_IN_MOVED_TO(self, event):
        """When a file is closed, process the associated event.
        """
        LOGGER.debug("trigger: IN_MOVED_TO")
        self.process(event)

    def process_IN_CREATE(self, event):
        """When a file is created, process the associated event.
        """
        LOGGER.debug("trigger: IN_CREATE")
        self.process(event)

    def process_IN_CLOSE_MODIFY(self, event):
        """When a file is modified and closed, process the associated event.
        """
        LOGGER.debug("trigger: IN_MODIFY")
        self.process(event)

    def process(self, event):
        '''Process the event'''
        # New file created and closed
        if not event.dir:
            LOGGER.debug("processing %s", event.pathname)
            # parse information and create self.info dict{}
            self.parse_file_info(event)
            if len(self.info) > 0:
                message = self.create_message()
                LOGGER.info("Publishing message %s" % str(message))
                self.pub.send(str(message))
            self.__clean__()

    def create_message(self):
        """Create broadcasted message
        """
        return Message(self.topic, 'file', self.info)

    def parse_file_info(self, event):
        '''Parse satellite and orbit information from the filename.
        Message is sent, if a matching filepattern is found.
        '''
        try:
            LOGGER.debug("filter: %s\t event: %s",
                         self.file_parser.fmt, event.pathname)
            self.info = self.file_parser.parse(
                os.path.basename(event.pathname))
            LOGGER.debug("Extracted: %s", str(self.info))
        except ValueError:
            # Filename didn't match pattern, so empty the info dict
            LOGGER.info("Couldn't extract any usefull information")
            self.info = {}
        else:
            self.info['uri'] = event.pathname
            self.info['uid'] = os.path.basename(event.pathname)
            self.info['sensor'] = self.instrument.split(',')
            LOGGER.debug("self.info['sensor']: " + str(self.info['sensor']))

            if self.tbus_orbit and "orbit_number" in self.info:
                LOGGER.info("Changing orbit number by -1!")
                self.info["orbit_number"] -= 1

            # replace values with corresponding aliases, if any are given
            if self.aliases:
                for key in self.info:
                    if key in self.aliases:
                        self.info[key] = self.aliases[key][str(self.info[key])]
コード例 #24
0
def update_nwp(params):
    LOG.info("METNO update nwp")

    result_files = dict()
    tempfile.tempdir = params['options']['nwp_outdir']

    ecmwf_path = params['options']['ecmwf_path']
    if not os.path.exists(ecmwf_path):
        ecmwf_path = ecmwf_path.replace("storeB","storeA")
        LOG.warning("Need to replace storeB with storeA for ecmwf_path: {}".format(str(ecmwf_path)))

    filelist = glob(os.path.join(ecmwf_path,  params['options']['ecmwf_prefix'] + "*"))

    if len(filelist) == 0:
        LOG.info("Found no input files! dir = " + str(os.path.join(ecmwf_path,  params['options']['ecmwf_prefix'] + "*")))
        return

    from trollsift import Parser, compose
    filelist.sort()
    for filename in filelist:
        if params['options']['ecmwf_file_name_sift'] != None:
            try:
                parser = Parser(params['options']['ecmwf_file_name_sift'])
            except NoOptionError as noe:
                LOG.error("NoOptionError {}".format(noe)) 
                continue
            if not parser.validate(os.path.basename(filename)):
                LOG.error("Parser validate on filename: {} failed.".format(filename))
                continue
            res = parser.parse("{}".format(os.path.basename(filename)))

            #This takes to long to complete.
            # if filename not in file_cache:
            #     cmd="grib_get -w count=1 -p dataDate {}".format(filename)
            #     run_shell_command(cmd, stdout_logfile='/tmp/dataDate')
            #     dataDate = open("/tmp/dataDate", 'r')
            #     dataDate_input = dataDate.read()
            #     dataDate.close()
            #     for dd in dataDate_input.splitlines():
            #         try:
            #             _dataDate = datetime.strptime(dd, "%Y%m%d")
            #         except Exception as e:
            #         LOG.error("Failed with :{}".format(e))
                    
            #     print "Data date is: {}".format(_dataDate)
            #     _file_cache[filename] = _dataDate
            #     file_cache.append(_file_cache)
            # else:
            #     print "already got datetime"

            time_now = datetime.utcnow()
            if 'analysis_time' in res:
                if res['analysis_time'].year == 1900:
                    #This is tricky. Filename is missing year in name
                    #Need to guess the year from a compination of year now
                    #and month now and month of the analysis time taken from the filename
                    #If the month now is 1(January) and the analysis month is 12,
                    #then the time has passed New Year, but the NWP analysis time is previous year.
                    if time_now.month == 1 and res['analysis_time'].month == 12:
                        analysis_year = time_now.year-1
                    else:
                        analysis_year = time_now.year

                    res['analysis_time'] = res['analysis_time'].replace( year = analysis_year)
            else:
                LOG.error("Can not parse analysis_time in file name. Check config and filename timestamp")

            if 'forecast_time' in res:
                if res['forecast_time'].year == 1900:
                    #See above for explanation
                    if res['analysis_time'].month == 12 and res['forecast_time'].month == 1:
                        forecast_year = res['analysis_time'].year+1
                    else:
                        forecast_year = res['analysis_time'].year
                        
                    res['forecast_time'] = res['forecast_time'].replace( year = forecast_year)
            else:
                LOG.error("Can not parse forecast_time in file name. Check config and filename timestamp")

            forecast_time = res['forecast_time']
            analysis_time = res['analysis_time']
            timestamp = analysis_time.strftime("%Y%m%d%H%M")
            step_delta = forecast_time - analysis_time
            step = "{:03d}H{:02d}M".format(int(step_delta.days*24 + step_delta.seconds/3600),0)
            timeinfo = "{:s}{:s}{:s}".format(analysis_time.strftime("%m%d%H%M"), forecast_time.strftime("%m%d%H%M"), res['end'])
        else:
            LOG.error("Not sift pattern given. Can not parse input NWP files")


        if analysis_time < params['starttime']:
            #LOG.debug("skip analysis time {} older than search time {}".format(analysis_time, params['starttime']))
            continue

        if int(step[:3]) not in params['nlengths']:
            #LOG.debug("Skip step {}, not in {}".format(int(step[:3]), params['nlengths']))
            continue

        output_parameters = {}
        output_parameters['analysis_time'] = analysis_time
        output_parameters['step_hour'] = int(step_delta.days*24 + step_delta.seconds/3600)
        output_parameters['step_min'] = 0
        try:
            if not os.path.exists(params['options']['nwp_outdir']):
                os.makedirs(params['options']['nwp_outdir'])
        except OSError as e:
            LOG.error("Failed to create directory: %s", e)
        result_file = ""
        try:
            result_file = os.path.join(params['options']['nwp_outdir'], compose(params['options']['nwp_output'],output_parameters))
            _result_file = os.path.join(params['options']['nwp_outdir'], compose("."+params['options']['nwp_output'],output_parameters))
            _result_file_lock = os.path.join(params['options']['nwp_outdir'], compose("."+params['options']['nwp_output']+".lock",output_parameters))
        except Exception as e:
            LOG.error("Joining outdir with output for nwp failed with: {}".format(e))

        LOG.info("Result file: {}".format(result_file))
        if os.path.exists(result_file):
            LOG.info("File: " + str(result_file) + " already there...")
            continue

        import fcntl
        import errno
        import time
        rfl = open(_result_file_lock,'w+')
        #do some locking
        while True:
            try:
                fcntl.flock(rfl, fcntl.LOCK_EX|fcntl.LOCK_NB)
                LOG.debug("1Got lock for NWP outfile: {}".format(result_file))
                break;
            except IOError as e:
                if e.errno != errno.EAGAIN:
                    raise
                else:
                    LOG.debug("Waiting for lock ... {}".format(result_file))
                    time.sleep(1)
            
        if os.path.exists(result_file):
            LOG.info("File: " + str(result_file) + " already there...")
            #Need to release the lock
            fcntl.flock(rfl, fcntl.LOCK_UN)
            rfl.close()
            continue

        #Need to set up temporary file to copy grib fields to
        #If ram is available through /run/shm, use this, else use /tmp
        if os.path.exists("/run/shm"):
            __tmpfile = "/run/shm/__tmp"
        else:
            __tmpfile = "/tmp/__tmp"

        #mgid = codes_grib_multi_new()
        #codes_grib_multi_support_on()

        #Some parameters can be found from the first name, and some from paramID
        #Need to check the second of the first one is not found
        parameter_name_list = ["indicatorOfParameter","paramId"]

        fout = open(_result_file, 'wb')
        try:

            #Do the static fields
            #Note: field not in the filename variable, but a configured filename for static fields
            static_filename = params['options']['ecmwf_static_surface']
            #print("Handeling static file: %s", static_filename)
            if not os.path.exists(static_filename):
                static_filename = static_filename.replace("storeB","storeA")
                LOG.warning("Need to replace storeB with storeA")

            index_vals = []
            index_keys = ['paramId', 'level']
            LOG.debug("Start building index")
            LOG.debug("Handeling file: %s", filename)
            iid = codes_index_new_from_file(filename, index_keys)
            filename_n1s = filename.replace('N2D','N1S')
            LOG.debug("Add to index %s", filename_n1s)
            codes_index_add_file(iid, filename_n1s)
            LOG.debug("Add to index %s", static_filename)
            codes_index_add_file(iid, static_filename)
            LOG.debug("Done index")
            for key in index_keys:
                #print("size: ", key, codes_index_get_size(iid, key))
                key_vals = codes_index_get(iid, key)
                key_vals = tuple(x for x in key_vals if x != 'undef')
                #print(key_vals)
                #print(" ".join(key_vals))
                index_vals.append(key_vals)

            for prod in product(*index_vals):
                #print('All products: ', end='')
                for i in range(len(index_keys)):
                    #print('Range:', index_keys[i], prod[i])
                    #print("{} {}, ".format(index_keys[i], prod[i]), end='')
                    codes_index_select(iid, index_keys[i], prod[i])
                #print()
                while 1:
                    gid = codes_new_from_index(iid)
                    if gid is None:
                        break
                    #print(" ".join(["%s=%s" % (key, codes_get(gid, key))
                    #                for key in index_keys]))
                    param = codes_get(gid, index_keys[0])
                    #print("Doing param:",param)
                    parameters = [172, 129, 235, 167, 168, 137, 130, 131, 132, 133, 134, 157]
                    if param in parameters:
                        LOG.debug("Doing param: %d",param)
                        #copy_needed_field(gid, mgid)
                        copy_needed_field(gid, fout)

                    codes_release(gid)
            codes_index_release(iid)

            #fout = open(_result_file, 'wb')
            #codes_grib_multi_write(mgid, fout)
            #codes_grib_multi_release(mgid)
 
            fout.close()
            os.rename(_result_file, result_file)
        except WrongLengthError as wle:
            LOG.error("Something wrong with the data: %s", wle)
            raise

        #In the end release the lock
        fcntl.flock(rfl, fcntl.LOCK_UN)
        rfl.close()
        os.remove(_result_file_lock)
    return
コード例 #25
0
class EventHandler(ProcessEvent):
    """
    Event handler class for inotify.
     *topic* - topic of the published messages
     *posttroll_port* - port number to publish the messages on
     *filepattern* - filepattern for finding information from the filename
    """
    def __init__(self,
                 topic,
                 instrument,
                 posttroll_port=0,
                 filepattern=None,
                 aliases=None,
                 tbus_orbit=False):
        super(EventHandler, self).__init__()

        self._pub = NoisyPublisher("trollstalker", posttroll_port, topic)
        self.pub = self._pub.start()
        self.topic = topic
        self.info = {}
        if filepattern is None:
            filepattern = '{filename}'
        self.file_parser = Parser(filepattern)
        self.instrument = instrument
        self.aliases = aliases
        self.tbus_orbit = tbus_orbit

    def stop(self):
        '''Stop publisher.
        '''
        self._pub.stop()

    def __clean__(self):
        '''Clean instance attributes.
        '''
        self.info = {}

    def process_IN_CLOSE_WRITE(self, event):
        """When a file is closed, process the associated event.
        """
        LOGGER.debug("trigger: IN_CLOSE_WRITE")
        self.process(event)

    def process_IN_CLOSE_NOWRITE(self, event):
        """When a nonwritable file is closed, process the associated event.
        """
        LOGGER.debug("trigger: IN_CREATE")
        self.process(event)

    def process_IN_MOVED_TO(self, event):
        """When a file is closed, process the associated event.
        """
        LOGGER.debug("trigger: IN_MOVED_TO")
        self.process(event)

    def process_IN_CREATE(self, event):
        """When a file is created, process the associated event.
        """
        LOGGER.debug("trigger: IN_CREATE")
        self.process(event)

    def process_IN_CLOSE_MODIFY(self, event):
        """When a file is modified and closed, process the associated event.
        """
        LOGGER.debug("trigger: IN_MODIFY")
        self.process(event)

    def process(self, event):
        '''Process the event'''
        # New file created and closed
        if not event.dir:
            LOGGER.debug("processing %s", event.pathname)
            # parse information and create self.info dict{}
            self.parse_file_info(event)
            if len(self.info) > 0:
                message = self.create_message()
                LOGGER.info("Publishing message %s" % str(message))
                self.pub.send(str(message))
            self.__clean__()

    def create_message(self):
        """Create broadcasted message
        """
        return Message(self.topic, 'file', self.info)

    def parse_file_info(self, event):
        '''Parse satellite and orbit information from the filename.
        Message is sent, if a matching filepattern is found.
        '''
        try:
            LOGGER.debug("filter: %s\t event: %s", self.file_parser.fmt,
                         event.pathname)
            self.info = self.file_parser.parse(os.path.basename(
                event.pathname))
            LOGGER.debug("Extracted: %s", str(self.info))
        except ValueError:
            # Filename didn't match pattern, so empty the info dict
            LOGGER.info("Couldn't extract any usefull information")
            self.info = {}
        else:
            self.info['uri'] = event.pathname
            self.info['uid'] = os.path.basename(event.pathname)
            self.info['sensor'] = self.instrument.split(',')
            LOGGER.debug("self.info['sensor']: " + str(self.info['sensor']))

            if self.tbus_orbit and "orbit_number" in self.info:
                LOGGER.info("Changing orbit number by -1!")
                self.info["orbit_number"] -= 1

            # replace values with corresponding aliases, if any are given
            if self.aliases:
                for key in self.info:
                    if key in self.aliases:
                        self.info[key] = self.aliases[key][str(self.info[key])]
コード例 #26
0
def update_nwp(starttime, nlengths):
    """Prepare NWP grib files for PPS. Consider only analysis times newer than
    *starttime*. And consider only the forecast lead times in hours given by
    the list *nlengths* of integers

    """

    LOG.info("Path to prepare_nwp config file = %s", str(CONFIG_PATH))
    LOG.info("Prepare_nwp config file = %s", str(CONFIG_FILE))
    LOG.info("Path to nhsf files: %s", str(nhsf_path))
    LOG.info("Path to nhsp files: %s", str(nhsp_path))

    tempfile.tempdir = nwp_outdir
    filelist = glob(os.path.join(nhsf_path, nhsf_prefix + "*"))
    if len(filelist) == 0:
        LOG.info("No input files! dir = %s", str(nhsf_path))
        return

    LOG.debug('NHSF NWP files found = %s', str(filelist))
    nfiles_error = 0
    for filename in filelist:
        if nhsf_file_name_sift is None:
            raise NwpPrepareError()

        try:
            parser = Parser(nhsf_file_name_sift)
        except NoOptionError as noe:
            LOG.error("NoOptionError {}".format(noe))
            continue
        if not parser.validate(os.path.basename(filename)):
            LOG.error("Parser validate on filename: {} failed.".format(filename))
            continue
        LOG.info("{}".format(os.path.basename(filename)))
        res = parser.parse("{}".format(os.path.basename(filename)))
        LOG.info("{}".format(res))
        if 'analysis_time' in res:
            if res['analysis_time'].year == 1900:
                res['analysis_time'] = res['analysis_time'].replace(year=datetime.utcnow().year)

            analysis_time = res['analysis_time']
            timestamp = analysis_time.strftime("%Y%m%d%H%M")
        else:
            raise NwpPrepareError("Can not parse analysis_time in file name. Check config and filename timestamp")

        if 'forecast_time' in res:
            if res['forecast_time'].year == 1900:
                res['forecast_time'] = res['forecast_time'].replace(year=datetime.utcnow().year)
            forecast_time = res['forecast_time']
            forecast_step = forecast_time - analysis_time
            forecast_step = "{:03d}H{:02d}M".format(forecast_step.days*24 + forecast_step.seconds/3600, 0)
            timeinfo = "{:s}{:s}{:s}".format(analysis_time.strftime(
                "%m%d%H%M"), forecast_time.strftime("%m%d%H%M"), res['end'])
        else:
            LOG.info("Can not parse forecast_time in file name. Try forecast step...")
            # This needs to be done more solid using the sift pattern! FIXME!
            timeinfo = filename.rsplit("_", 1)[-1]
            # Forecast step in hours:
            if 'forecast_step' in res:
                forecast_step = res['forecast_step']
            else:
                raise NwpPrepareError(
                    'Failed parsing forecast_step in file name. Check config and filename timestamp.')

        LOG.debug("Analysis time and start time: %s %s", str(analysis_time), str(starttime))
        if analysis_time < starttime:
            continue
        if forecast_step not in nlengths:
            LOG.debug("Skip step. Forecast step and nlengths: %s %s", str(forecast_step), str(nlengths))
            continue

        LOG.info("timestamp, step: %s %s", str(timestamp), str(forecast_step))
        result_file = os.path.join(
            nwp_outdir, nwp_output_prefix + timestamp + "+" + '%.3dH00M' % forecast_step)
        if os.path.exists(result_file):
            LOG.info("File: " + str(result_file) + " already there...")
            continue

        tmp_filename = make_temp_filename(suffix="_" + timestamp + "+" +
                                          '%.3dH00M' % forecast_step, dir=nwp_outdir)

        LOG.info("result and tmp files: " + str(result_file) + " " + str(tmp_filename))
        nhsp_file = os.path.join(nhsp_path, nhsp_prefix + timeinfo)
        if not os.path.exists(nhsp_file):
            LOG.warning("Corresponding nhsp-file not there: " + str(nhsp_file))
            continue

        cmd = ("grib_copy -w gridType=regular_ll " + nhsp_file + " " + tmp_filename)
        retv = run_command(cmd)
        LOG.debug("Returncode = " + str(retv))
        if retv != 0:
            LOG.error(
                "Failed doing the grib-copy! Will continue with the next file")
            nfiles_error = nfiles_error + 1
            if nfiles_error > len(filelist) / 2:
                LOG.error(
                    "More than half of the Grib files failed upon grib_copy!")
                raise IOError('Failed running grib_copy on many Grib files')

        if not os.path.exists(nwp_lsmz_filename):
            LOG.error("No static grib file with land-sea mask and " +
                      "topography available. Can't prepare NWP data")
            raise IOError('Failed getting static land-sea mask and topography')

        tmp_result_filename = make_temp_filename()
        cmd = ('cat ' + tmp_filename + " " +
               os.path.join(nhsf_path, nhsf_prefix + timeinfo) +
               " " + nwp_lsmz_filename + " > " + tmp_result_filename)
        LOG.debug("Add topography and land-sea mask to data:")
        LOG.debug("Command = " + str(cmd))
        _start = time.time()
        retv = os.system(cmd)
        _end = time.time()
        LOG.debug("os.system call took: %f seconds", _end - _start)
        LOG.debug("Returncode = " + str(retv))
        if retv != 0:
            LOG.warning("Failed generating nwp file %s ...", result_file)
            if os.path.exists(tmp_result_filename):
                os.remove(tmp_result_filename)
            raise IOError("Failed adding topography and land-sea " +
                          "mask data to grib file")

        if os.path.exists(tmp_filename):
            os.remove(tmp_filename)
        else:
            LOG.warning("tmp file %s gone! Cannot clean it...", tmp_filename)

        if check_nwp_content(tmp_result_filename):
            LOG.info('A check of the NWP file content has been attempted: %s',
                     result_file)
            _start = time.time()
            os.rename(tmp_result_filename, result_file)
            _end = time.time()
            LOG.debug("Rename file %s to %s: This took %f seconds",
                      tmp_result_filename, result_file, _end - _start)
        else:
            LOG.warning("Missing important fields. No nwp file %s written to disk",
                        result_file)
            if os.path.exists(tmp_result_filename):
                os.remove(tmp_result_filename)

    return
コード例 #27
0
ファイル: nc_pps_l2.py プロジェクト: junjie2008v/mpop
class PPSReader(Reader):
    """Reader class for PPS files"""
    pformat = "nc_pps_l2"

    def __init__(self, *args, **kwargs):
        Reader.__init__(self, *args, **kwargs)
        # Source of the data, 'local' or 'ears'
        self._source = None
        # Parser for getting info from the file names
        self._parser = None
        # Satellite config
        self._config = None
        # Location of geolocation files, required for 'local' products
        self._cloud_product_geodir = None
        # Name of the product having geolocation for 'local' products
        self._geolocation_product_name = None

    def _read_config(self, sat_name, instrument_name):
        '''Read config for the satellite'''

        if self._config:
            return

        self._config = ConfigParser()
        configfile = os.path.join(CONFIG_PATH, sat_name + ".cfg")
        LOG.debug("Read configfile %s", configfile)
        self._config.read(configfile)

        try:
            self._cloud_product_geodir = \
                self._config.get(instrument_name + "-level3",
                                 "cloud_product_geodir",
                                 raw=True,
                                 vars=os.environ)
        except NoOptionError:
            pass

        LOG.debug("cloud_product_geodir = %s", self._cloud_product_geodir)

        try:
            self._geolocation_product_name = \
                self._config.get(instrument_name + "-level3",
                                 "geolocation_product_name",
                                 raw=True,
                                 vars=os.environ)
        except NoOptionError:
            if self._source != 'ears':
                LOG.warning(
                    "No geolocation product name given in config, "
                    "using default: %s", GEO_PRODUCT_NAME_DEFAULT)
                self._geolocation_product_name = GEO_PRODUCT_NAME_DEFAULT

    def _determine_prod_and_geo_files(self, prodfilenames):
        """From the list of product files and the products to load determine the
        product files and the geolocation files that will be considered when
        reading the data

        """

        # geofiles4product is a dict listing all geo-locations files applicable
        # for each product.
        # prodfiles4product is a dict listing all product files for a given
        # product name

        prodfiles4product = {}
        geofiles4product = {}
        if prodfilenames:
            if not isinstance(prodfilenames, (list, set, tuple)):
                prodfilenames = [prodfilenames]
            for fname in prodfilenames:
                # Only standard NWCSAF/PPS and EARS-NWC naming accepted!
                # No support for old file names (< PPSv2014)
                if (os.path.basename(fname).startswith("S_NWC") or
                        os.path.basename(fname).startswith("W_XX-EUMETSAT")):
                    if not self._parser:
                        if os.path.basename(fname).startswith("S_NWC"):
                            self._source = 'local'
                            self._parser = Parser(LOCAL_PPS_FILE_MASK)
                        else:
                            self._source = 'ears'
                            self._parser = Parser(EARS_PPS_FILE_MASK)
                else:
                    LOG.info("Unrecognized NWCSAF/PPS file: %s", fname)
                    continue

                parse_info = self._parser.parse(os.path.basename(fname))
                prodname = parse_info['product']

                if prodname not in prodfiles4product:
                    prodfiles4product[prodname] = []

                prodfiles4product[prodname].append(fname)

            # Assemble geolocation information
            if self._source == 'ears':
                # For EARS data, the files have geolocation in themselves
                for prodname, fnames in prodfiles4product.iteritems():
                    geofiles4product[prodname] = fnames
            else:
                # For locally processed data, use the geolocation from
                # the product defined in config

                if self._geolocation_product_name in prodfiles4product:
                    for prodname in prodfiles4product.keys():
                        geofiles4product[prodname] = \
                            prodfiles4product[self._geolocation_product_name]
                else:
                    # If the product files with geolocation are not used,
                    # assume that they are still available on the disk.

                    if self._cloud_product_geodir is None:
                        LOG.warning(
                            "Config option 'cloud_product_geodir' is not "
                            "available! Assuming same directory as "
                            "products.")
                    for prodname in prodfiles4product.keys():
                        geofiles4product[prodname] = []
                        for fname in prodfiles4product[prodname]:
                            directory = self._cloud_product_geodir or \
                                os.path.abspath(fname)
                            parse_info = \
                                self._parser.parse(os.path.basename(fname))
                            fname = fname.replace(
                                parse_info['product'],
                                self._geolocation_product_name)
                            fname = os.path.join(directory, fname)
                            geofiles4product[prodname].append(fname)

            # Check that each product file has a corresponding geolocation
            # file:
            '''
            if self._geolocation_product_name:
                for prod in products:
                    if prod not in geofiles4product:
                        LOG.error("No product name %s in dict "
                                  "geofiles4product!",
                                  prod)
                        continue
                    if prod not in prodfiles4product:
                        LOG.error("No product name %s in dict "
                                  "prodfiles4product!",
                                  prod)
                        continue
                    if len(geofiles4product[prod]) != \
                       len(prodfiles4product[prod]):
                        LOG.error("Mismatch in number of product files and "
                                  "matching geolocation files!")
            '''

        return prodfiles4product, geofiles4product

    def load(self, satscene, **kwargs):
        """Read data from file and load it into *satscene*.
        """

        prodfilenames = kwargs.get('filename')
        time_interval = kwargs.get('time_interval')
        if prodfilenames and time_interval:
            LOG.warning("You have specified both a list of files " +
                        "and a time interval")
            LOG.warning("Specifying a time interval will only take effect " +
                        "if no files are specified")
            time_interval = None

        products = satscene.channels_to_load & set(PPS_PRODUCTS)
        if len(products) == 0:
            LOG.debug("No PPS cloud products to load, abort")
            return

        self._read_config(satscene.fullname, satscene.instrument_name)

        LOG.info("Products to load: %s", str(products))

        # If a list of files are provided to the load call, we disregard the
        # direcorty and filename specifications/definitions in the config file.

        if not prodfilenames:
            try:
                area_name = satscene.area_id or satscene.area.area_id
            except AttributeError:
                area_name = "satproj_?????_?????"

            # Make the list of files for the requested products:
            if isinstance(time_interval, (tuple, set, list)) and \
               len(time_interval) == 2:
                time_start, time_end = time_interval
            else:
                time_start, time_end = satscene.time_slot, None

            LOG.debug("Start and end times: %s %s", str(time_start),
                      str(time_end))
            prodfilenames = get_filenames(satscene, products, self._config,
                                          (time_start, time_end), area_name)

        LOG.debug("Product files: %s", str(prodfilenames))

        retv = self._determine_prod_and_geo_files(prodfilenames)
        prodfiles4product, geofiles4product = retv

        # Reading the products
        classes = {
            "CTTH": CloudTopTemperatureHeight,
            "CT": CloudType,
            "CMA": CloudMask,
            "PC": PrecipitationClouds,
            "CPP": CloudPhysicalProperties
        }
        nodata_mask = False

        read_external_geo = {}
        for product in products:
            LOG.debug("Loading %s", product)

            if product not in prodfiles4product:
                LOG.warning("No files found for product: %s", product)
                continue

            pps_band = PPSProductData(prodfiles4product[product]).read()
            chn = classes[product]()
            chn.read(pps_band)

            if not chn.name in satscene:
                LOG.info("Adding new channel %s", chn.name)
                satscene.channels.append(chn)

            # Check if geolocation is loaded:
            if not chn.area:
                read_external_geo[product] = satscene.channels[-1].name

        # Check if some 'channel'/product needs geolocation. If some
        # product does not have geolocation, get it from the
        # geofilename:
        from pyresample import geometry

        # Load geolocation
        for chn_name in read_external_geo.values():
            LOG.debug("ch_name = %s", str(chn_name))
            chn = satscene[chn_name]
            geofilenames = geofiles4product[chn_name]
            LOG.debug("Geo-files = %s", str(geofilenames))
            geoloc = PpsGeolocationData(chn.shape, chn.granule_lengths,
                                        geofilenames).read()

            try:
                satscene[chn.name].area = geometry.SwathDefinition(
                    lons=geoloc.longitudes, lats=geoloc.latitudes)

                area_name = ("swath_" + satscene.fullname + "_" +
                             str(satscene.time_slot) + "_" + str(chn.shape) +
                             "_" + chn.name)
                satscene[chn.name].area.area_id = area_name
                satscene[chn.name].area_id = area_name
            except ValueError:
                LOG.exception(
                    'Failed making a SwathDefinition: ' +
                    'min,max lons,lats = (%f %f") (%f,%f)',
                    geoloc.longitudes.data.min(), geoloc.longitudes.data.max(),
                    geoloc.latitudes.data.min(), geoloc.latitudes.data.max())
                LOG.warning("No geolocation loaded for %s", str(chn_name))

        # PpsGeolocationData.clear_cache()

        return
コード例 #28
0
def get_filenames(filepattern):
    parser = Parser(filepattern)
    for filename in glob.iglob(parser.globify()):
        yield filename, parser.parse(filename)