Python Parser.globifyの例

プログラミング言語: Python

名前空間/パッケージ名: trollsift

クラス/型: Parser

メソッド/関数: globify

hotexamples.comのコード掲載数: 9

Python Parser.globify - 9件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのtrollsift.Parser.globifyの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

Parser(22)

parse(20)

globify(6)

validate(6)

compose(5)

コード例 #1

ファイルを表示

def setup(decoder):
    """Setup the granule triggerer.
    """

    granule_triggers = []

    for section in CONFIG.sections():
        regions = [
            get_area_def(region)
            for region in CONFIG.get(section, "regions").split()
        ]

        timeliness = timedelta(minutes=CONFIG.getint(section, "timeliness"))
        try:
            duration = timedelta(seconds=CONFIG.getfloat(section, "duration"))
        except NoOptionError:
            duration = None
        collectors = [
            region_collector.RegionCollector(region, timeliness, duration)
            for region in regions
        ]

        try:
            observer_class = CONFIG.get(section, "watcher")
            pattern = CONFIG.get(section, "pattern")
            parser = Parser(pattern)
            glob = parser.globify()
        except NoOptionError:
            observer_class = None

        try:
            publish_topic = CONFIG.get(section, "publish_topic")
        except NoOptionError:
            publish_topic = None

        if observer_class in ["PollingObserver", "Observer"]:
            LOGGER.debug("Using %s for %s", observer_class, section)
            granule_trigger = \
                trigger.WatchDogTrigger(collectors,
                                        terminator,
                                        decoder,
                                        [glob],
                                        observer_class,
                                        publish_topic=publish_topic)

        else:
            LOGGER.debug("Using posttroll for %s", section)
            granule_trigger = trigger.PostTrollTrigger(
                collectors,
                terminator,
                CONFIG.get(section, 'service').split(','),
                CONFIG.get(section, 'topics').split(','),
                publish_topic=publish_topic)
        granule_triggers.append(granule_trigger)

    return granule_triggers

コード例 #2

ファイルを表示

    def _compose_filenames(self, time_slot):
        """Compose filename set()s"""

        # Get copy of metadata
        meta = self.slots[time_slot]['metadata'].copy()

        # Replace variable tags (such as processing time) with
        # wildcards, as these can't be forecasted.
        try:
            ignored_keys = self._config["config"]["variable_tags"].split(',')
            meta = _copy_without_ignore_items(meta, ignored_keys=ignored_keys)
        except KeyError:
            pass

        critical_files, wanted_files, all_files = [], [], []
        files = self._config["files"]

        for fle in files:
            pattern = fle["pattern"]
            parser = Parser(pattern)
            for seg in fle["segments"]:
                chans = seg.get('channel_name', [''])
                critical_segments = seg.get('critical_segments', [''])
                wanted_segments = seg.get('wanted_segments', [''])
                all_segments = seg.get('all_segments', [''])
                for chan in chans:
                    meta['channel_name'] = chan
                    for seg2 in critical_segments:
                        meta['segment'] = seg2
                        critical_files.append(parser.globify(meta))
                    for seg2 in wanted_segments:
                        meta['segment'] = seg2
                        wanted_files.append(parser.globify(meta))
                    for seg2 in all_segments:
                        meta['segment'] = seg2
                        all_files.append(parser.globify(meta))

        self.slots[time_slot]['critical_files'] = set(critical_files)
        self.slots[time_slot]['wanted_files'] = set(wanted_files)
        self.slots[time_slot]['all_files'] = set(all_files)

コード例 #3

ファイルを表示

ファイル: gatherer.py プロジェクト: tparker-usgs/trollduction

def setup(decoder):
    """Setup the granule triggerer.
    """

    granule_triggers = []

    for section in CONFIG.sections():
        regions = [get_area_def(region)
                   for region in CONFIG.get(section, "regions").split()]

        timeliness = timedelta(minutes=CONFIG.getint(section, "timeliness"))
        try:
            duration = timedelta(seconds=CONFIG.getfloat(section, "duration"))
        except NoOptionError:
            duration = None
        collectors = [region_collector.RegionCollector(region, timeliness, duration)
                      for region in regions]

        try:
            observer_class = CONFIG.get(section, "watcher")
            pattern = CONFIG.get(section, "pattern")
            parser = Parser(pattern)
            glob = parser.globify()
        except NoOptionError:
            observer_class = None

        try:
            publish_topic = CONFIG.get(section, "publish_topic")
        except NoOptionError:
            publish_topic = None

        if observer_class in ["PollingObserver", "Observer"]:
            LOGGER.debug("Using %s for %s", observer_class, section)
            granule_trigger = \
                trigger.WatchDogTrigger(collectors,
                                        terminator,
                                        decoder,
                                        [glob],
                                        observer_class,
                                        publish_topic=publish_topic)

        else:
            LOGGER.debug("Using posttroll for %s", section)
            granule_trigger = trigger.PostTrollTrigger(
                collectors, terminator,
                CONFIG.get(section, 'service').split(','),
                CONFIG.get(section, 'topics').split(','),
                publish_topic=publish_topic)
        granule_triggers.append(granule_trigger)

    return granule_triggers

コード例 #4

ファイルを表示

ファイル: geographic_gatherer.py プロジェクト: pytroll/pytroll-collectors

    def _get_watchdog_trigger(self, collectors):
        observer_class = self._config_items["watcher"]
        if observer_class not in ["PollingObserver", "Observer"]:
            raise ValueError

        pattern = self._config_items["pattern"]
        parser = Parser(pattern)
        glob = parser.globify()
        publish_topic = self._get_publish_topic()

        logger.debug("Using %s for %s", observer_class, self.section)
        return WatchDogTrigger(collectors,
                               self._config_items, [glob],
                               observer_class,
                               self.publisher,
                               publish_topic=publish_topic)

コード例 #5

ファイルを表示

ファイル: gatherer.py プロジェクト: meteoswiss-mdr/trollduction

            region_collector.RegionCollector(region, timeliness, duration)
            for region in regions
        ]

        try:
            pattern = config.get(section, "pattern")
            try:
                observer_class = config.get(section, "watcher")
            except NoOptionError:
                observer_class = None
            logger.debug("Using watchdog for %s", section)
            parser = Parser(pattern)

            granule_trigger = trigger.WatchDogTrigger(collectors, terminator,
                                                      decoder,
                                                      [parser.globify()],
                                                      observer_class)

        except NoOptionError:
            logger.debug("Using posttroll for %s", section)
            granule_trigger = trigger.PostTrollTrigger(
                collectors, terminator,
                config.get(section, 'service').split(','),
                config.get(section, 'topics').split(','))
        granule_triggers.append(granule_trigger)

    pub.start()
    for granule_trigger in granule_triggers:
        granule_trigger.start()
    try:
        while True:

コード例 #6

ファイルを表示

ファイル: segment_gatherer_orig.py プロジェクト: yufeizhu600/pytroll-collectors

class SegmentGatherer(object):

    """Gatherer for geostationary satellite segments and multifile polar
    satellite granules."""

    def __init__(self, config, section):
        self._config = config
        self._section = section
        topics = config.get(section, 'topics').split()

        try:
            nameservers = config.get(section, 'nameserver')
            nameservers = nameservers.split()
        except (NoOptionError, ValueError):
            nameservers = []

        try:
            addresses = config.get(section, 'addresses')
            addresses = addresses.split()
        except (NoOptionError, ValueError):
            addresses = None

        try:
            publish_port = config.get(section, 'publish_port')
        except NoOptionError:
            publish_port = 0

        try:
            services = config.get(section, 'services').split()
        except (NoOptionError, ValueError):
            services = ""

        self._listener = ListenerContainer(topics=topics, addresses=addresses
                                           services=services)
        self._publisher = publisher.NoisyPublisher("segment_gatherer",
                                                   port=publish_port,
                                                   nameservers=nameservers)
        self._subject = config.get(section, "publish_topic")
        self._pattern = config.get(section, 'pattern')
        self._parser = Parser(self._pattern)
        try:
            self._time_tolerance = config.getint(section, "time_tolerance")
        except NoOptionError:
            self._time_tolerance = 30
        try:
            self._timeliness = dt.timedelta(seconds=config.getint(section,
                                                                  "timeliness"))
        except (NoOptionError, ValueError):
            self._timeliness = dt.timedelta(seconds=1200)

        try:
            self._num_files_premature_publish = \
                config.getint(section, "num_files_premature_publish")
        except (NoOptionError, ValueError):
            self._num_files_premature_publish = -1

        self.slots = OrderedDict()

        self.time_name = config.get(section, 'time_name')

        self.logger = logging.getLogger("segment_gatherer")
        self._loop = False
        self._providing_server = None
        if config.has_option(section, 'providing_server'):
            self._providing_server = config.get(section, 'providing_server')

    def _clear_data(self, time_slot):
        """Clear data."""
        if time_slot in self.slots:
            del self.slots[time_slot]

    def _init_data(self, mda):
        """Init wanted, all and critical files"""
        # Init metadata struct
        metadata = mda.copy()
        metadata['dataset'] = []

        time_slot = str(metadata[self.time_name])
        self.logger.debug("Adding new slot: %s", time_slot)
        self.slots[time_slot] = {}
        self.slots[time_slot]['metadata'] = metadata.copy()

        # Critical files that are required, otherwise production will fail.
        # If there are no critical files, empty set([]) is used.
        try:
            critical_segments = self._config.get(self._section,
                                                 "critical_files")
            self.slots[time_slot]['critical_files'] = \
                self._compose_filenames(time_slot, critical_segments)
        except (NoOptionError, ValueError):
            self.slots[time_slot]['critical_files'] = set([])

        # These files are wanted, but not critical to production
        self.slots[time_slot]['wanted_files'] = \
            self._compose_filenames(time_slot,
                                    self._config.get(self._section,
                                                     "wanted_files"))
        # Name of all the files
        self.slots[time_slot]['all_files'] = \
            self._compose_filenames(time_slot,
                                    self._config.get(self._section,
                                                     "all_files"))

        self.slots[time_slot]['received_files'] = set([])
        self.slots[time_slot]['delayed_files'] = dict()
        self.slots[time_slot]['missing_files'] = set([])
        self.slots[time_slot]['timeout'] = None
        self.slots[time_slot]['files_till_premature_publish'] = \
            self._num_files_premature_publish

    def _compose_filenames(self, time_slot, itm_str):
        """Compose filename set()s based on a pattern and item string.
        itm_str is formated like ':PRO,:EPI' or 'VIS006:8,VIS008:1-8,...'"""

        # Empty set
        result = set()

        # Get copy of metadata
        meta = self.slots[time_slot]['metadata'].copy()

        # Replace variable tags (such as processing time) with
        # wildcards, as these can't be forecasted.
        try:
            meta = _copy_without_ignore_items(
                meta, ignored_keys=self._config.get(self._section,
                                                    'variable_tags').split(','))
        except NoOptionError:
            pass

        for itm in itm_str.split(','):
            channel_name, segments = itm.split(':')
            segments = segments.split('-')
            if len(segments) > 1:
                format_string = '%d'
                if len(segments[0]) > 1 and segments[0][0] == '0':
                    format_string = '%0' + str(len(segments[0])) + 'd'
                segments = [format_string % i for i in range(int(segments[0]),
                                                             int(segments[-1]) + 1)]
            meta['channel_name'] = channel_name
            for seg in segments:
                meta['segment'] = seg
                fname = self._parser.globify(meta)
                result.add(fname)

        return result

    def _publish(self, time_slot, missing_files_check=True):
        """Publish file dataset and reinitialize gatherer."""

        data = self.slots[time_slot]

        # Diagnostic logging about delayed ...
        delayed_files = data['delayed_files']
        if len(delayed_files) > 0:
            file_str = ''
            for key in delayed_files:
                file_str += "%s %f seconds, " % (key, delayed_files[key])
            self.logger.warning("Files received late: %s",
                                file_str.strip(', '))

        if missing_files_check:
            # and missing files
            missing_files = data['all_files'].difference(
                data['received_files'])
            if len(missing_files) > 0:
                self.logger.warning("Missing files: %s",
                                    ', '.join(missing_files))

        # Remove tags that are not necessary for datasets
        for tag in REMOVE_TAGS:
            try:
                del data['metadata'][tag]
            except KeyError:
                pass

        msg = message.Message(self._subject, "dataset", data['metadata'])
        self.logger.info("Sending: %s", str(msg))
        self._publisher.send(str(msg))

        # self._clear_data(time_slot)

    def set_logger(self, logger):
        """Set logger."""
        self.logger = logger

    def update_timeout(self, slot):
        slot['timeout'] = dt.datetime.utcnow() + self._timeliness
        time_slot = str(slot['metadata'][self.time_name])
        self.logger.info("Setting timeout to %s for slot %s.",
                         str(slot['timeout']),
                         time_slot)

    def slot_ready(self, slot):
        """Determine if slot is ready to be published."""
        # If no files have been collected, return False
        if len(slot['received_files']) == 0:
            return SLOT_NOT_READY

        time_slot = str(slot['metadata'][self.time_name])

        wanted_and_critical_files = slot[
            'wanted_files'].union(slot['critical_files'])
        num_wanted_and_critical_files_received = len(
            wanted_and_critical_files & slot['received_files'])

        self.logger.debug("Got %s wanted or critical files in slot %s.",
                          num_wanted_and_critical_files_received,
                          time_slot)

        if num_wanted_and_critical_files_received \
                == slot['files_till_premature_publish']:
            slot['files_till_premature_publish'] = -1
            return SLOT_READY_BUT_WAIT_FOR_MORE

        # If all wanted files have been received, return True
        if wanted_and_critical_files.issubset(
                slot['received_files']):
            self.logger.info("All files received for slot %s.",
                             time_slot)
            return SLOT_READY

        if slot['timeout'] is None:
            self.update_timeout(slot)

        if slot['timeout'] < dt.datetime.utcnow():
            if slot['critical_files'].issubset(slot['received_files']):
                # All critical files have been received
                # Timeout reached, collection ready
                self.logger.info("Timeout occured, required files received "
                                 "for slot %s.", time_slot)
                return SLOT_READY
            else:
                # Timeout reached, collection is obsolete
                self.logger.warning("Timeout occured and required files "
                                    "were not present, data discarded for "
                                    "slot %s.",
                                    time_slot)
                return SLOT_OBSOLETE_TIMEOUT

        # Timeout not reached, wait for more files
        return SLOT_NOT_READY

    def run(self):
        """Run SegmentGatherer"""
        self._publisher.start()
        self._loop = True
        while self._loop:
            # Check if there are slots ready for publication
            slots = self.slots.copy()
            for slot in slots:
                slot = str(slot)
                status = self.slot_ready(slots[slot])
                if status == SLOT_READY:
                    # Collection ready, publish and remove
                    self._publish(slot)
                    self._clear_data(slot)
                if status == SLOT_READY_BUT_WAIT_FOR_MORE:
                    # Collection ready, publish and but wait for more
                    self._publish(slot, missing_files_check=False)
                elif status == SLOT_OBSOLETE_TIMEOUT:
                    # Collection unfinished and obslote, discard
                    self._clear_data(slot)
                else:
                    # Collection unfinished, wait for more data
                    pass

            # Check listener for new messages
            msg = None
            try:
                msg = self._listener.output_queue.get(True, 1)
            except AttributeError:
                msg = self._listener.queue.get(True, 1)
            except KeyboardInterrupt:
                self.stop()
                continue
            except Queue.Empty:
                continue

            if msg.type == "file":
                if (self._providing_server and
                        self._providing_server != msg.host):
                    continue

                self.logger.info("New message received: %s", str(msg))
                self.process(msg)

    def stop(self):
        """Stop gatherer."""
        self.logger.info("Stopping gatherer.")
        self._loop = False
        if self._listener is not None:
            self._listener.stop()
        if self._publisher is not None:
            self._publisher.stop()

    def process(self, msg):
        """Process message"""
        try:
            mda = self._parser.parse(msg.data["uid"])
        except ValueError:
            self.logger.debug("Unknown file, skipping.")
            return

        metadata = {}

        # Use values parsed from the filename as basis
        for key in mda:
            if key not in DO_NOT_COPY_KEYS:
                metadata[key] = mda[key]

        # Update with data given in the message
        for key in msg.data:
            if key not in DO_NOT_COPY_KEYS:
                metadata[key] = msg.data[key]

        time_slot = self._find_time_slot(metadata[self.time_name])

        # Init metadata etc if this is the first file
        if time_slot not in self.slots:
            self._init_data(metadata)
            slot = self.slots[time_slot]
            to_add = []
            for filename in slot['all_files']:
                if filename == msg.data['uid']:
                    continue
                url = urlparse(msg.data['uri'])
                path = os.path.join(os.path.dirname(url.path), filename)
                if not os.path.exists(path):
                    continue
                new_url = list(url)
                new_url[2] = path
                uri = urlunparse(new_url)

                slot['metadata']['dataset'].append({'uri': uri,
                                                    'uid': filename})
                to_add.append(filename)

            slot['received_files'].update(to_add)
            if to_add:
                self.logger.debug("Some files were already received %s",
                                  str(to_add))
                self.update_timeout(slot)

        slot = self.slots[time_slot]

        # Replace variable tags (such as processing time) with
        # wildcards, as these can't be forecasted.
        try:
            mda = _copy_without_ignore_items(
                mda, ignored_keys=self._config.get(self._section,
                                                   'variable_tags').split(','))
        except NoOptionError:
            pass

        mask = self._parser.globify(mda)

        if mask in slot['received_files']:
            return

        # Add uid and uri
        slot['metadata']['dataset'].append({'uri': msg.data['uri'],
                                            'uid': msg.data['uid']})

        # Collect all sensors, not only the latest
        if type(msg.data["sensor"]) not in (tuple, list, set):
            msg.data["sensor"] = [msg.data["sensor"]]
        for sensor in msg.data["sensor"]:
            if "sensor" not in slot["metadata"]:
                slot["metadata"]["sensor"] = []
            if sensor not in slot["metadata"]["sensor"]:
                slot["metadata"]["sensor"].append(sensor)

        # If critical files have been received but the slot is
        # not complete, add the file to list of delayed files
        if len(slot['critical_files']) > 0 and \
           slot['critical_files'].issubset(slot['received_files']):
            delay = dt.datetime.utcnow() - (slot['timeout'] - self._timeliness)
            slot['delayed_files'][msg.data['uid']] = delay.total_seconds()

        # Add to received files
        slot['received_files'].add(mask)

    def _find_time_slot(self, time_obj):
        """Find time slot and return the slot as a string.  If no slots are
        close enough, return *str(time_obj)*"""
        for slot in self.slots:
            time_slot = self.slots[slot]['metadata'][self.time_name]
            time_diff = time_obj - time_slot
            if abs(time_diff.total_seconds()) < self._time_tolerance:
                self.logger.debug("Found existing time slot, using that")
                return str(time_slot)

        return str(time_obj)

コード例 #7

ファイルを表示

def get_filenames(filepattern):
    parser = Parser(filepattern)
    for filename in glob.iglob(parser.globify()):
        yield filename, parser.parse(filename)

コード例 #8

ファイルを表示

ファイル: gatherer.py プロジェクト: ch-k/trollduction

            duration = None
        collectors = [region_collector.RegionCollector(
            region, timeliness, duration) for region in regions]

        try:
            pattern = config.get(section, "pattern")
            try:
                observer_class = config.get(section, "watcher")
            except NoOptionError:
                observer_class = None
            logger.debug("Using watchdog for %s", section)
            parser = Parser(pattern)

            granule_trigger = trigger.WatchDogTrigger(collectors, terminator,
                                                      decoder,
                                                      [parser.globify()],
                                                      observer_class)

        except NoOptionError:
            logger.debug("Using posttroll for %s", section)
            granule_trigger = trigger.PostTrollTrigger(
                collectors, terminator,
                config.get(section, 'service').split(','),
                config.get(section, 'topics').split(','))
        granule_triggers.append(granule_trigger)

    pub.start()
    for granule_trigger in granule_triggers:
        granule_trigger.start()
    try:
        while True:

コード例 #9

ファイルを表示

ファイル: segment_gatherer.py プロジェクト: tparker-usgs/trollduction

class SegmentGatherer(object):

    """Gatherer for geostationary satellite segments and multifile polar
    satellite granules."""

    def __init__(self, config, section):
        self._config = config
        self._section = section
        topics = config.get(section, 'topics').split()
        self._listener = ListenerContainer(topics=topics)
        self._publisher = publisher.NoisyPublisher("segment_gatherer")
        self._subject = config.get(section, "publish_topic")
        self._pattern = config.get(section, 'pattern')
        self._parser = Parser(self._pattern)

        try:
            self._timeliness = dt.timedelta(seconds=config.getint(section,
                                                                  "timeliness"))
        except (NoOptionError, ValueError):
            self._timeliness = dt.timedelta(seconds=1200)

        try:
            self._num_files_premature_publish = \
                config.getint(section, "num_files_premature_publish")
        except (NoOptionError, ValueError):
            self._num_files_premature_publish = -1

        self.slots = OrderedDict()

        self.time_name = config.get(section, 'time_name')

        self.logger = logging.getLogger("segment_gatherer")
        self._loop = False

    def _clear_data(self, time_slot):
        """Clear data."""
        if time_slot in self.slots:
            del self.slots[time_slot]

    def _init_data(self, msg, mda):
        """Init wanted, all and critical files"""
        # Init metadata struct
        metadata = {}
        for key in msg.data:
            if key not in ("uid", "uri", "channel_name", "segment"):
                metadata[key] = msg.data[key]
        metadata['dataset'] = []

        # Use also metadata parsed from the filenames
        metadata.update(mda)

        time_slot = str(metadata[self.time_name])
        self.slots[time_slot] = {}
        self.slots[time_slot]['metadata'] = metadata.copy()

        # Critical files that are required, otherwise production will fail.
        # If there are no critical files, empty set([]) is used.
        try:
            critical_segments = self._config.get(self._section,
                                                 "critical_files")
            self.slots[time_slot]['critical_files'] = \
                    self._compose_filenames(time_slot, critical_segments)
        except (NoOptionError, ValueError):
            self.slots[time_slot]['critical_files'] = set([])

        # These files are wanted, but not critical to production
        self.slots[time_slot]['wanted_files'] = \
            self._compose_filenames(time_slot,
                                    self._config.get(self._section,
                                                     "wanted_files"))
        # Name of all the files
        self.slots[time_slot]['all_files'] = \
            self._compose_filenames(time_slot,
                                    self._config.get(self._section,
                                                     "all_files"))

        self.slots[time_slot]['received_files'] = set([])
        self.slots[time_slot]['delayed_files'] = dict()
        self.slots[time_slot]['missing_files'] = set([])
        self.slots[time_slot]['timeout'] = None
        self.slots[time_slot]['files_till_premature_publish'] = \
            self._num_files_premature_publish

    def _compose_filenames(self, time_slot, itm_str):
        """Compose filename set()s based on a pattern and item string.
        itm_str is formated like ':PRO,:EPI' or 'VIS006:8,VIS008:1-8,...'"""

        # Empty set
        result = set()

        # Get copy of metadata
        meta = self.slots[time_slot]['metadata'].copy()

        # Replace variable tags (such as processing time) with
        # wildcards, as these can't be forecasted.
        try:
            meta = _copy_without_ignore_items(
                meta, ignored_keys=self._config.get(self._section,
                                                    'variable_tags').split(','))
        except NoOptionError:
            pass

        for itm in itm_str.split(','):
            channel_name, segments = itm.split(':')
            segments = segments.split('-')
            if len(segments) > 1:
                segments = ['%d' % i for i in range(int(segments[0]),
                                                    int(segments[-1]) + 1)]
            meta['channel_name'] = channel_name
            for seg in segments:
                meta['segment'] = seg
                fname = self._parser.globify(meta)
                result.add(fname)

        return result

    def _publish(self, time_slot, missing_files_check=True):
        """Publish file dataset and reinitialize gatherer."""

        data = self.slots[time_slot]

        # Diagnostic logging about delayed ...
        delayed_files = data['delayed_files']
        if len(delayed_files) > 0:
            file_str = ''
            for key in delayed_files:
                file_str += "%s %f seconds, " % (key, delayed_files[key])
            self.logger.warning("Files received late: %s", file_str.strip(', '))

        if missing_files_check:
            # and missing files
            missing_files = data['all_files'].difference(data['received_files'])
            if len(missing_files) > 0:
                self.logger.warning("Missing files: %s", ', '.join(missing_files))

        msg = message.Message(self._subject, "dataset", data['metadata'])
        self.logger.info("Sending: %s", str(msg))
        self._publisher.send(str(msg))

        # self._clear_data(time_slot)

    def set_logger(self, logger):
        """Set logger."""
        self.logger = logger

    def slot_ready(self, slot):
        """Determine if slot is ready to be published."""
        # If no files have been collected, return False
        if len(slot['received_files']) == 0:
            return SLOT_NOT_READY

        time_slot = str(slot['metadata'][self.time_name])

        wanted_and_critical_files = \
            slot['wanted_files'].union(slot['critical_files'])
        num_wanted_and_critical_files_received = \
            len(wanted_and_critical_files & slot['received_files'])

        self.logger.debug("Got %s wanted or critical files in slot %s.",
                          num_wanted_and_critical_files_received,
                          time_slot)

        if num_wanted_and_critical_files_received \
                == slot['files_till_premature_publish']:
            slot['files_till_premature_publish'] = -1
            return SLOT_READY_BUT_WAIT_FOR_MORE

        # If all wanted files have been received, return True
        if wanted_and_critical_files.issubset(
                slot['received_files']):
            self.logger.info("All files received for slot %s.",
                             time_slot)
            return SLOT_READY

        if slot['critical_files'].issubset(slot['received_files']):
            # All critical files have been received
            if slot['timeout'] is None:
                # Set timeout
                slot['timeout'] = dt.datetime.utcnow() + self._timeliness
                self.logger.info("Setting timeout to %s for slot %s.",
                                 str(slot['timeout']),
                                 time_slot)
                return SLOT_NOT_READY
            elif slot['timeout'] < dt.datetime.utcnow():
                # Timeout reached, collection ready
                self.logger.info("Timeout occured, required files received "
                                 "for slot %s.", time_slot)
                return SLOT_READY
            else:
                pass
        else:
            if slot['timeout'] is None:
                slot['timeout'] = dt.datetime.utcnow() + self._timeliness
                self.logger.info("Setting timeout to %s for slot %s",
                                 str(slot['timeout']),
                                 time_slot)
                return SLOT_NOT_READY

            elif slot['timeout'] < dt.datetime.utcnow():
                # Timeout reached, collection is obsolete
                self.logger.warning("Timeout occured and required files "
                                    "were not present, data discarded for "
                                    "slot %s.",
                                    time_slot)
                return SLOT_OBSOLETE_TIMEOUT
            else:
                pass

        # Timeout not reached, wait for more files
        return SLOT_NOT_READY

    def run(self):
        """Run SegmentGatherer"""
        self._publisher.start()
        self._loop = True
        while self._loop:
            # Check if there are slots ready for publication
            slots = self.slots.copy()
            for slot in slots:
                slot = str(slot)
                status = self.slot_ready(slots[slot])
                if status == SLOT_READY:
                    # Collection ready, publish and remove
                    self._publish(slot)
                    self._clear_data(slot)
                if status == SLOT_READY_BUT_WAIT_FOR_MORE:
                    # Collection ready, publish and but wait for more
                    self._publish(slot, missing_files_check=False)
                elif status == SLOT_OBSOLETE_TIMEOUT:
                    # Collection unfinished and obslote, discard
                    self._clear_data(slot)
                else:
                    # Collection unfinished, wait for more data
                    pass

            # Check listener for new messages
            msg = None
            try:
                msg = self._listener.queue.get(True, 1)
            except KeyboardInterrupt:
                self.stop()
                continue
            except Queue.Empty:
                continue

            if msg.type == "file":
                self.logger.info("New message received: %s", str(msg))
                self.process(msg)

    def stop(self):
        """Stop gatherer."""
        self.logger.info("Stopping gatherer.")
        self._loop = False
        if self._listener is not None:
            self._listener.stop()
        if self._publisher is not None:
            self._publisher.stop()

    def process(self, msg):
        """Process message"""
        try:
            mda = self._parser.parse(msg.data["uid"])
        except ValueError:
            self.logger.debug("Unknown file, skipping.")
            return
        time_slot = str(mda[self.time_name])

        # Init metadata etc if this is the first file
        if time_slot not in self.slots:
            self._init_data(msg, mda)

        slot = self.slots[time_slot]

        # Replace variable tags (such as processing time) with
        # wildcards, as these can't be forecasted.
        try:
            mda = _copy_without_ignore_items(
                mda, ignored_keys=self._config.get(self._section,
                                                   'variable_tags').split(','))
        except NoOptionError:
            pass

        mask = self._parser.globify(mda)

        if mask in slot['received_files']:
            return

        # Add uid and uri
        slot['metadata']['dataset'].append({'uri': msg.data['uri'],
                                            'uid': msg.data['uid']})

        # If critical files have been received but the slot is
        # not complete, add the file to list of delayed files
        if len(slot['critical_files']) > 0 and \
           slot['critical_files'].issubset(slot['received_files']):
            delay = dt.datetime.utcnow() - (slot['timeout'] - self._timeliness)
            slot['delayed_files'][msg.data['uid']] = delay.total_seconds()

        # Add to received files
        slot['received_files'].add(mask)