Python debug Beispiele, cthulhu.log.log.debug Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: cluster_monitor.py Projekt: tserong/calamari

    def fetch(self, minion_id, sync_type):
        log.debug("SyncObjects.fetch: %s/%s" % (minion_id, sync_type))
        if minion_id is None:
            # We're probably being replayed to from the database
            log.warn("SyncObjects.fetch called with minion_id=None")
            return

        self._fetching_at[sync_type] = now()
        client = LocalClient(config.get('cthulhu', 'salt_config_path'))
        # TODO clean up unused 'since' argument
        pub_data = client.run_job(
            minion_id, 'ceph.get_cluster_object',
            condition_kwarg(
                [], {
                    'cluster_name': self._cluster_name,
                    'sync_type': sync_type.str,
                    'since': None
                }))
        if not pub_data:
            log.error("Failed to start fetch job %s/%s" %
                      (minion_id, sync_type))
            # Don't throw an exception because if a fetch fails we should always
        else:
            log.debug("SyncObjects.fetch: jid=%s minions=%s" %
                      (pub_data['jid'], pub_data['minions']))

Beispiel #2

0

Datei anzeigen

Datei: eventer.py Projekt: ceph/calamari

    def on_sync_object(self, fsid, sync_type, new, old):
        """
        Notification that a newer version of a SyncObject is available, or
        the first version of a SyncObject is available at startup (wherein
        old will be a null SyncObject)

        :param fsid: The FSID of the cluster to which the object belongs
        :param sync_type: A SyncObject subclass
        :param new: A SyncObject
        :param old: A SyncObject (same type as new)
        """
        log.debug("Eventer.on_sync_object: %s" % sync_type.str)

        if old.data is None:
            return

        if sync_type == OsdMap:
            self._on_pool_status(fsid, new, old)
            self._on_osd_map(fsid, new, old)
        elif sync_type == Health:
            self._on_health(fsid, new, old)
        elif sync_type == MonStatus:
            self._on_mon_status(fsid, new, old)
        elif sync_type == QuorumStatus:
            self._on_quorum_status(fsid, new, old)

        self._flush()

Beispiel #3

0

Datei anzeigen

    def on_fetch_complete(self, minion_id, sync_type, version, data):
        """
        :return A SyncObject if this version was new to us, else None
        """
        log.debug("SyncObjects.on_fetch_complete %s/%s/%s" % (minion_id, sync_type.str, version))
        self._fetching_at[sync_type] = None

        # A fetch might give us a newer version than we knew we had asked for
        if sync_type.cmp(version, self._known_versions[sync_type]) > 0:
            self._known_versions[sync_type] = version

        # Don't store this if we already got something newer
        if sync_type.cmp(version, self.get_version(sync_type)) <= 0:
            log.warn("Ignoring outdated update %s/%s from %s" % (sync_type.str, version, minion_id))
            new_object = None
        else:
            log.info("Got new version %s/%s" % (sync_type.str, version))
            new_object = self.set_map(sync_type, version, data)

        # This might not be the latest: if it's not, send out another fetch
        # right away
        if sync_type.cmp(self._known_versions[sync_type], version) > 0:
            self.fetch(minion_id, sync_type)

        return new_object

Beispiel #4

0

Datei anzeigen

    def _run(self):
        log.info("%s running" % self.__class__.__name__)

        event = SaltEventSource(log, salt_config)
        while not self._complete.is_set():
            # No salt tag filtering: https://github.com/saltstack/salt/issues/11582
            ev = event.get_event(full=True)
            if ev is not None and 'tag' in ev:
                tag = ev['tag']
                data = ev['data']
                try:
                    if tag.startswith("ceph/cluster/"):
                        cluster_data = data['data']
                        if not cluster_data['fsid'] in self._manager.clusters:
                            self._manager.on_discovery(data['id'],
                                                       cluster_data)
                        else:
                            log.debug(
                                "%s: heartbeat from existing cluster %s" %
                                (self.__class__.__name__,
                                 cluster_data['fsid']))
                    elif re.match("^salt/job/\d+/ret/[^/]+$", tag):
                        if data['fun'] == 'saltutil.running':
                            self._manager.requests.on_tick_response(
                                data['id'], data['return'])
                        else:
                            self._manager.requests.on_completion(data)
                    else:
                        # This does not concern us, ignore it
                        log.debug("TopLevelEvents: ignoring %s" % tag)
                        pass
                except:
                    log.exception("Exception handling message tag=%s" % tag)

        log.info("%s complete" % self.__class__.__name__)

Beispiel #5

0

Datei anzeigen

    def _run(self):
        log.info("%s running" % self.__class__.__name__)

        event = SaltEventSource(salt_config)
        while not self._complete.is_set():
            # No salt tag filtering: https://github.com/saltstack/salt/issues/11582
            ev = event.get_event(full=True)
            if ev is not None:
                tag = ev['tag']
                data = ev['data']
                try:
                    if tag.startswith("ceph/cluster/"):
                        cluster_data = data['data']
                        if not cluster_data['fsid'] in self._manager.clusters:
                            self._manager.on_discovery(data['id'],
                                                       cluster_data)
                        else:
                            log.debug(
                                "%s: heartbeat from existing cluster %s" %
                                (self.__class__.__name__,
                                 cluster_data['fsid']))
                    else:
                        # This does not concern us, ignore it
                        pass
                except:
                    log.debug("Message content: %s" % data)
                    log.exception("Exception handling message")

        log.info("%s complete" % self.__class__.__name__)

Beispiel #6

0

Datei anzeigen

Datei: manager.py Projekt: abonas/calamari

    def _run(self):
        log.info("%s running" % self.__class__.__name__)

        event = SaltEventSource(salt_config)
        while not self._complete.is_set():
            # No salt tag filtering: https://github.com/saltstack/salt/issues/11582
            ev = event.get_event(full=True)
            if ev is not None:
                tag = ev['tag']
                data = ev['data']
                try:
                    if tag.startswith("ceph/cluster/"):
                        cluster_data = data['data']
                        if not cluster_data['fsid'] in self._manager.clusters:
                            self._manager.on_discovery(data['id'], cluster_data)
                        else:
                            log.debug("%s: heartbeat from existing cluster %s" % (
                                self.__class__.__name__, cluster_data['fsid']))
                    else:
                        # This does not concern us, ignore it
                        pass
                except:
                    log.debug("Message content: %s" % data)
                    log.exception("Exception handling message")

        log.info("%s complete" % self.__class__.__name__)

Beispiel #7

0

Datei anzeigen

    def on_sync_object(self, fsid, sync_type, new, old):
        """
        Notification that a newer version of a SyncObject is available, or
        the first version of a SyncObject is available at startup (wherein
        old will be a null SyncObject)

        :param fsid: The FSID of the cluster to which the object belongs
        :param sync_type: A SyncObject subclass
        :param new: A SyncObject
        :param old: A SyncObject (same type as new)
        """
        log.debug("Eventer.on_sync_object: %s" % sync_type.str)

        if old.data is None:
            return

        if sync_type == OsdMap:
            self._on_pool_status(fsid, new, old)
            self._on_osd_map(fsid, new, old)
        elif sync_type == Health:
            self._on_health(fsid, new, old)
        elif sync_type == MonStatus:
            self._on_mon_status(fsid, new, old)
        elif sync_type == QuorumStatus:
            self._on_quorum_status(fsid, new, old)

        self._flush()

Beispiel #8

0

Datei anzeigen

Datei: manager.py Projekt: YankunLi/calamari

    def _run(self):
        log.info("%s running" % self.__class__.__name__)

        event = SaltEventSource(log, salt_config)
        while not self._complete.is_set():
            # No salt tag filtering: https://github.com/saltstack/salt/issues/11582
            ev = event.get_event(full=True)
            if ev is not None and 'tag' in ev:
                tag = ev['tag']
                data = ev['data']
                try:
                    if tag.startswith("ceph/cluster/"):
                        cluster_data = data['data']
                        if not cluster_data['fsid'] in self._manager.clusters:
                            self._manager.on_discovery(data['id'], cluster_data)
                        else:
                            log.debug("%s: heartbeat from existing cluster %s" % (
                                self.__class__.__name__, cluster_data['fsid']))
                    elif re.match("^salt/job/\d+/ret/[^/]+$", tag):
                        if data['fun'] == 'saltutil.running':
                            self._manager.requests.on_tick_response(data['id'], data['return'])
                        else:
                            self._manager.requests.on_completion(data)
                    else:
                        # This does not concern us, ignore it
                        log.debug("TopLevelEvents: ignoring %s" % tag)
                        pass
                except:
                    log.exception("Exception handling message tag=%s" % tag)

        log.info("%s complete" % self.__class__.__name__)

Beispiel #9

0

Datei anzeigen

    def _is_favorite(self, minion_id):
        """
        Check if this minion is the one which we are currently treating
        as the primary source of updates, and promote it to be the
        favourite if the favourite has not sent a heartbeat since
        cthulhu->favorite_timeout_s.

        :return True if this minion was the favorite or has just been
                promoted.
        """
        t_now = now()
        self._last_heartbeat[minion_id] = t_now

        if self._favorite_mon is None:
            log.debug("%s is my new favourite" % minion_id)
            self._set_favorite(minion_id)
            return True
        elif minion_id != self._favorite_mon:
            # Consider whether this minion should become my new favourite: has it been
            # too long since my current favourite reported in?
            time_since = t_now - self._last_heartbeat[self._favorite_mon]
            favorite_timeout_s = self._servers.get_contact_period(self._favorite_mon) * FAVORITE_TIMEOUT_FACTOR
            if time_since > datetime.timedelta(seconds=favorite_timeout_s):
                log.debug("My old favourite, %s, has not sent a heartbeat for %s: %s is my new favourite" % (
                    self._favorite_mon, time_since, minion_id
                ))
                self._set_favorite(minion_id)

        return minion_id == self._favorite_mon

Beispiel #10

0

Datei anzeigen

    def on_version(self, reported_by, sync_type, new_version):
        """
        Notify me that a particular version of a particular map exists.

        I may choose to initiate RPC to retrieve the map
        """
        log.debug("SyncObjects.on_version %s/%s/%s" % (reported_by, sync_type.str, new_version))
        old_version = self.get_version(sync_type)
        if sync_type.cmp(new_version, old_version) > 0:
            known_version = self._known_versions[sync_type]
            if sync_type.cmp(new_version, known_version) > 0:
                # We are out of date: request an up to date copy
                log.info("Advanced known version %s/%s %s->%s" % (
                    self._cluster_name, sync_type.str, known_version, new_version))
                self._known_versions[sync_type] = new_version
            else:
                log.info("on_version: %s is newer than %s" % (new_version, old_version))

            # If we already have a request out for this type of map, then consider
            # cancelling it if we've already waited for a while.
            if self._fetching_at[sync_type] is not None:
                if now() - self._fetching_at[sync_type] < self.FETCH_TIMEOUT:
                    log.info("Fetch already underway for %s" % sync_type.str)
                    return
                else:
                    log.warn("Abandoning fetch for %s started at %s" % (
                        sync_type.str, self._fetching_at[sync_type]))

            log.info("on_version: fetching %s/%s from %s, currently got %s, know %s" % (
                sync_type, new_version, reported_by, old_version, known_version
            ))
            self.fetch(reported_by, sync_type)

Beispiel #11

0

Datei anzeigen

Datei: pool_request_factory.py Projekt: zhongmei0802/calamari

    def create(self, attributes):
        commands = [('osd pool create', {'pool': attributes['name'], 'pg_num': attributes['pg_num']})]

        # Calculate appropriate min_size, including default if none given
        req_size = attributes.get('size', 0)
        req_min_size = attributes.get('min_size', 0)
        attributes['min_size'] = self._pool_min_size(req_size, req_min_size)

        # Which attributes must we set after the initial create?
        post_create_attrs = attributes.copy()
        del post_create_attrs['name']
        del post_create_attrs['pg_num']
        if 'pgp_num' in post_create_attrs:
            del post_create_attrs['pgp_num']

        commands.extend(self._pool_attribute_commands(
            attributes['name'],
            post_create_attrs
        ))

        log.debug("Post-create attributes: %s" % post_create_attrs)
        log.debug("Commands: %s" % commands)

        return PoolCreatingRequest(
            "Creating pool '{name}'".format(name=attributes['name']),
            self._cluster_monitor.fsid, self._cluster_monitor.name,
            attributes['name'], commands)

Beispiel #12

0

Datei anzeigen

    def _emit_stats(self):
        try:
            if not self._socket:
                log.info("Opening carbon socket {0}:{1}".format(
                    self.CARBON_HOST, self.CARBON_PORT))
                self._socket = socket.socket(socket.AF_INET,
                                             socket.SOCK_STREAM)
                self._socket.connect((self.CARBON_HOST, self.CARBON_PORT))

            carbon_data = ""
            t = int(time.time())
            usage = resource.getrusage(resource.RUSAGE_SELF)
            for usage_field in ("utime", "stime", "maxrss", "ixrss", "idrss",
                                "isrss", "minflt", "majflt", "nswap",
                                "inblock", "oublock", "msgsnd", "msgrcv",
                                "nsignals", "nvcsw", "nivcsw"):
                val = getattr(usage, "ru_{0}".format(usage_field))
                log.debug("{0}: {1}".format(usage_field, val))
                carbon_data += "calamari.cthulhu.ru_{0} {1} {2}\n".format(
                    usage_field, val, t)

            self._socket.sendall(carbon_data)
        except socket.gaierror, resource.error:
            log.exception("Failed to send debugging statistics")
            self._close()

Beispiel #13

0

Datei anzeigen

Datei: rpc.py Projekt: ivaxer/calamari

 def wrap(*args, **kwargs):
     log.debug("RpcInterface >> %s(%s, %s)" % (item, args, kwargs))
     try:
         rc = attr(*args, **kwargs)
         log.debug("RpcInterface << %s" % item)
     except:
         log.exception("RpcInterface !! %s" % item)
         raise
     return rc

Beispiel #14

0

Datei anzeigen

Datei: rpc.py Projekt: zerolugithub/calamari

 def wrap(*args, **kwargs):
     log.debug("RpcInterface >> %s(%s, %s)" % (item, args, kwargs))
     try:
         rc = attr(*args, **kwargs)
         log.debug("RpcInterface << %s" % item)
     except:
         log.exception("RpcInterface !! %s" % item)
         raise
     return rc

Beispiel #15

0

Datei anzeigen

Datei: eventer.py Projekt: AnishSid/calamari

    def _run(self):
        self._emit(INFO, "Calamari server started")
        self._flush()

        self._complete.wait(GRACE_PERIOD)
        while not self._complete.is_set():
            self.on_tick()
            self._complete.wait(TICK_SECONDS)
        log.debug("Eventer complete")

Beispiel #16

0

Datei anzeigen

Datei: eventer.py Projekt: hit1943/calamari-1

    def _run(self):
        self._emit(INFO, "Calamari server started")
        self._flush()

        self._complete.wait(GRACE_PERIOD)
        while not self._complete.is_set():
            self.on_tick()
            self._complete.wait(TICK_SECONDS)
        log.debug("Eventer complete")

Beispiel #17

0

Datei anzeigen

Datei: eventer.py Projekt: ceph/calamari

    def _run(self):
        log.debug("Eventer running")
        self._emit(INFO, "Calamari server started")
        self._emit_to_salt_bus(SEVERITIES[INFO], "Calamari server started", "ceph/calamari/started")
        self._flush()

        self._complete.wait(GRACE_PERIOD)
        while not self._complete.is_set():
            self.on_tick()
            self._complete.wait(TICK_SECONDS)
        log.debug("Eventer complete")

Beispiel #18

0

Datei anzeigen

    def _run(self):
        log.debug("Eventer running")
        self._emit(INFO, "Calamari server started")
        self._emit_to_salt_bus(SEVERITIES[INFO], "Calamari server started",
                               "ceph/calamari/started")
        self._flush()

        self._complete.wait(GRACE_PERIOD)
        while not self._complete.is_set():
            self.on_tick()
            self._complete.wait(TICK_SECONDS)
        log.debug("Eventer complete")

Beispiel #19

0

Datei anzeigen

    def on_tick(self):
        """
        Periodically call this to drive non-event-driven events (i.e. things
        which are based on walltime checks)
        """
        log.debug("Eventer.on_tick")

        now_utc = now()

        for fqdn, server_state in self._manager.servers.servers.items():
            if not server_state.managed:
                # We don't expect messages from unmanaged servers so don't
                # worry about whether they sent us one recently.
                continue

            if len(server_state.clusters) == 1:
                # Because Events can only be associated with one FSID, we only make this
                # association for servers with exactly one cluster.  This is a bit cheeky and
                # kind of an unnecessary limitation in the Event DB schema.
                fsid = server_state.clusters[0]
            else:
                fsid = None

            contact_threshold = CONTACT_THRESHOLD_FACTOR * self._manager.servers.get_contact_period(fqdn)
            if now_utc - server_state.last_contact > datetime.timedelta(seconds=contact_threshold):
                if fqdn not in self._servers_complained:
                    self._emit(WARNING, "Server {fqdn} is late reporting in, last report at {last}".format(
                        fqdn=fqdn, last=server_state.last_contact
                    ), fqdn=fqdn, fsid=fsid)
                    self._servers_complained.add(fqdn)
            else:
                if fqdn in self._servers_complained:
                    self._emit(RECOVERY, "Server {fqdn} regained contact".format(fqdn=fqdn),
                               fqdn=fqdn, fsid=fsid)
                    self._servers_complained.discard(fqdn)

        for fsid, cluster_monitor in self._manager.clusters.items():
            if cluster_monitor.update_time is None or now_utc - cluster_monitor.update_time > datetime.timedelta(
                    seconds=CLUSTER_CONTACT_THRESHOLD):
                if fsid not in self._clusters_complained:
                    self._clusters_complained.add(fsid)
                    self._emit(WARNING, "Cluster '{name}' is late reporting in".format(name=cluster_monitor.name),
                               fsid=fsid)
            else:
                if fsid in self._clusters_complained:
                    self._emit(RECOVERY, "Cluster '{name}' regained contact".format(name=cluster_monitor.name),
                               fsid=fsid)
                    self._clusters_complained.discard(fsid)

        self._flush()

Beispiel #20

0

Datei anzeigen

Datei: cluster_monitor.py Projekt: zerolugithub/calamari

    def _run(self):
        self._plugin_monitor.start()

        self._ready.set()
        log.debug("ClusterMonitor._run: ready")

        remote.listen(self._complete,
                      on_heartbeat=self.on_heartbeat,
                      fsid=self.fsid,
                      on_job=self.on_job_complete)

        log.info("%s complete" % self.__class__.__name__)
        self._plugin_monitor.stop()
        self._plugin_monitor.join()
        self.done.set()

Beispiel #21

0

Datei anzeigen

Datei: cluster_monitor.py Projekt: ceph/calamari

    def _run(self):
        self._plugin_monitor.start()

        self._ready.set()
        log.debug("ClusterMonitor._run: ready")

        remote.listen(self._complete,
                      on_heartbeat=self.on_heartbeat,
                      fsid=self.fsid,
                      on_job=self.on_job_complete)

        log.info("%s complete" % self.__class__.__name__)
        self._plugin_monitor.stop()
        self._plugin_monitor.join()
        self.done.set()

Beispiel #22

0

Datei anzeigen

    def tick(self):
        """
        For walltime-based monitoring of running requests.  Long-running requests
        get a periodic call to saltutil.running to verify that things really
        are still happening.
        """

        if not self._by_jid:
            return
        else:
            log.debug("RequestCollection.tick: %s JIDs underway" %
                      len(self._by_jid))

        # Identify JIDs who haven't had a saltutil.running reponse for too long.
        # Kill requests in a separate phase because request:JID is not 1:1
        stale_jobs = set()
        _now = now()
        for request in self._by_jid.values():
            if _now - request.alive_at > datetime.timedelta(
                    seconds=TICK_PERIOD * 3):
                log.error("Request %s JID %s stale: now=%s, alive_at=%s" %
                          (request.id, request.jid, _now, request.alive_at))
                stale_jobs.add(request)

        # Any identified stale jobs are errored out.
        for request in stale_jobs:
            with self._update_index(request):
                request.set_error("Lost contact")
                request.jid = None
                request.complete()

        # Identify minions associated with JIDs in flight
        query_minions = set()
        for jid, request in self._by_jid.items():
            query_minions.add(request.minion_id)

        # Attempt to emit a saltutil.running to ping jobs, next tick we
        # will see if we got updates to the alive_at attribute to indicate non-staleness
        if query_minions:
            log.info("RequestCollection.tick: sending saltutil.running to {0}".
                     format(query_minions))
            client = LocalClient(config.get('cthulhu', 'salt_config_path'))
            pub_data = client.run_job(list(query_minions),
                                      'saltutil.running', [],
                                      expr_form="list")
            if not pub_data:
                log.warning("Failed to publish saltutil.running to {0}".format(
                    query_minions))

Beispiel #23

0

Datei anzeigen

Datei: request_collection.py Projekt: abonas/calamari

    def on_tick_response(self, minion_id, jobs):
        """
        Update the alive_at parameter of requests to record that they
        are still running remotely.

        :param jobs: The response from a saltutil.running
        """
        log.debug("RequestCollection.on_tick_response: %s from %s" % (len(jobs), minion_id))
        for job in jobs:
            try:
                request = self._by_jid[job['jid']]
            except KeyError:
                # Not one of mine, ignore it
                pass
            else:
                request.alive_at = now()

Beispiel #24

0

Datei anzeigen

    def on_tick_response(self, minion_id, jobs):
        """
        Update the alive_at parameter of requests to record that they
        are still running remotely.

        :param jobs: The response from a saltutil.running
        """
        log.debug("RequestCollection.on_tick_response: %s from %s" %
                  (len(jobs), minion_id))
        for job in jobs:
            try:
                request = self._by_jid[job['jid']]
            except KeyError:
                # Not one of mine, ignore it
                pass
            else:
                request.alive_at = now()

Beispiel #25

0

Datei anzeigen

Datei: cluster_monitor.py Projekt: ceph/calamari

    def on_job_complete(self, fqdn, jid, success, result, cmd, args):
        # It would be much nicer to put the FSID at the start of
        # the tag, if salt would only let us add custom tags to our jobs.
        # Instead we enforce a convention that calamari jobs include
        # fsid in their return value.
        if 'fsid' not in result or result['fsid'] != self.fsid:
            # Something for a different ClusterMonitor
            log.debug("Ignoring job return, not for my FSID")
            return

        if cmd == 'ceph.get_cluster_object':
            # A ceph.get_cluster_object response
            if not success:
                log.error("on_sync_object: failure from %s: %s" % (fqdn, result))
                return

            self.on_sync_object(fqdn, result)
        else:
            log.warning("Unexpected function '%s' (%s)" % (cmd, cmd))

Beispiel #26

0

Datei anzeigen

Datei: request_collection.py Projekt: abonas/calamari

    def tick(self):
        """
        For walltime-based monitoring of running requests.  Long-running requests
        get a periodic call to saltutil.running to verify that things really
        are still happening.
        """

        if not self._by_jid:
            return
        else:
            log.debug("RequestCollection.tick: %s JIDs underway" % len(self._by_jid))

        # Identify JIDs who haven't had a saltutil.running reponse for too long.
        # Kill requests in a separate phase because request:JID is not 1:1
        stale_jobs = set()
        _now = now()
        for request in self._by_jid.values():
            if _now - request.alive_at > datetime.timedelta(seconds=TICK_PERIOD * 3):
                log.error("Request %s JID %s stale: now=%s, alive_at=%s" % (
                    request.id, request.jid, _now, request.alive_at
                ))
                stale_jobs.add(request)

        # Any identified stale jobs are errored out.
        for request in stale_jobs:
            with self._update_index(request):
                request.set_error("Lost contact")
                request.jid = None
                request.complete()

        # Identify minions associated with JIDs in flight
        query_minions = set()
        for jid, request in self._by_jid.items():
            query_minions.add(request.minion_id)

        # Attempt to emit a saltutil.running to ping jobs, next tick we
        # will see if we got updates to the alive_at attribute to indicate non-staleness
        if query_minions:
            log.info("RequestCollection.tick: sending saltutil.running to {0}".format(query_minions))
            client = LocalClient(config.get('cthulhu', 'salt_config_path'))
            pub_data = client.run_job(list(query_minions), 'saltutil.running', [], expr_form="list")
            if not pub_data:
                log.warning("Failed to publish saltutil.running to {0}".format(query_minions))

Beispiel #27

0

Datei anzeigen

Datei: plugin_monitor.py Projekt: ceph/calamari

    def run_plugin(self, plugin_name, status_processor, period):
        # slice of some time for the checks, leaving some for the status_processor
        check_timeout = int(period * .75)
        salt_name = '.'.join((plugin_name, 'status_check'))

        while not self._complete.is_set():
            start = int(time.time())
            timeout_at = start + period
            servers = [s.fqdn for s in self._servers.get_all()]
            check_data = self.filter_errors(self._remote_run_cmd_async(servers,
                                                                       salt_name,
                                                                       timeout=check_timeout),
                                            salt_name)

            self.plugin_results[plugin_name] = status_processor(check_data)
            log.debug("processed " + str(plugin_name) + str(check_data))

            time_left = timeout_at - int(time.time())
            gevent.sleep(max(0, time_left))

Beispiel #28

0

Datei anzeigen

Datei: plugin_monitor.py Projekt: zerolugithub/calamari

    def run_plugin(self, plugin_name, status_processor, period):
        # slice of some time for the checks, leaving some for the status_processor
        check_timeout = int(period * .75)
        salt_name = '.'.join((plugin_name, 'status_check'))

        while not self._complete.is_set():
            start = int(time.time())
            timeout_at = start + period
            servers = [s.fqdn for s in self._servers.get_all()]
            check_data = self.filter_errors(
                self._remote_run_cmd_async(servers,
                                           salt_name,
                                           timeout=check_timeout), salt_name)

            self.plugin_results[plugin_name] = status_processor(check_data)
            log.debug("processed " + str(plugin_name) + str(check_data))

            time_left = timeout_at - int(time.time())
            gevent.sleep(max(0, time_left))

Beispiel #29

0

Datei anzeigen

Datei: cluster_monitor.py Projekt: remy1991/calamari

    def fetch(self, minion_id, sync_type):
        log.debug("SyncObjects.fetch: %s/%s" % (minion_id, sync_type))
        if minion_id is None:
            # We're probably being replayed to from the database
            log.warn("SyncObjects.fetch called with minion_id=None")
            return

        self._fetching_at[sync_type] = now()
        client = LocalClient(config.get('cthulhu', 'salt_config_path'))
        # TODO clean up unused 'since' argument
        pub_data = client.run_job(minion_id, 'ceph.get_cluster_object',
                                  condition_kwarg([], {'cluster_name': self._cluster_name,
                                                       'sync_type': sync_type.str,
                                                       'since': None}))
        if not pub_data:
            log.error("Failed to start fetch job %s/%s" % (minion_id, sync_type))
            # Don't throw an exception because if a fetch fails we should always
        else:
            log.debug("SyncObjects.fetch: jid=%s minions=%s" % (pub_data['jid'], pub_data['minions']))

Beispiel #30

0

Datei anzeigen

Datei: eventer.py Projekt: ceph/calamari

    def __init__(self, manager):
        super(Eventer, self).__init__()
        self._manager = manager

        self._complete = gevent.event.Event()

        # Flags for things we have complained about being out of contact
        # with, to avoid generating the same events repeatedly
        self._servers_complained = set()
        self._clusters_complained = set()

        # Check the config to decide if events has to be pushed to salt event bus.
        # If config is set initialize the salt caller object used to push events.
        if EMIT_EVENTS_TO_SALT_EVENT_BUS:
            log.debug("Events will be emitted to salt event bus")
            __opts__ = salt.config.minion_config(MINION_CONFIG)
            __opts__['file_client'] = 'local'
            self.caller = salt.client.Caller(mopts=__opts__)

        self._events = []

Beispiel #31

0

Datei anzeigen

Datei: cluster_monitor.py Projekt: ceph/calamari

    def fetch(self, minion_id, sync_type):
        log.debug("SyncObjects.fetch: %s/%s" % (minion_id, sync_type))
        if minion_id is None:
            # We're probably being replayed to from the database
            log.warn("SyncObjects.fetch called with minion_id=None")
            return

        self._fetching_at[sync_type] = now()
        try:
            # TODO clean up unused 'since' argument
            jid = remote.run_job(minion_id, 'ceph.get_cluster_object',
                                 {'cluster_name': self._cluster_name,
                                  'sync_type': sync_type.str,
                                  'since': None})
        except Unavailable:
            # Don't throw an exception because if a fetch fails we should end up
            # issuing another on next heartbeat
            log.error("Failed to start fetch job %s/%s" % (minion_id, sync_type))
        else:
            log.debug("SyncObjects.fetch: jid=%s" % jid)

Beispiel #32

0

Datei anzeigen

    def __init__(self, manager):
        super(Eventer, self).__init__()
        self._manager = manager

        self._complete = gevent.event.Event()

        # Flags for things we have complained about being out of contact
        # with, to avoid generating the same events repeatedly
        self._servers_complained = set()
        self._clusters_complained = set()

        # Check the config to decide if events has to be pushed to salt event bus.
        # If config is set initialize the salt caller object used to push events.
        if EMIT_EVENTS_TO_SALT_EVENT_BUS:
            log.debug("Events will be emitted to salt event bus")
            __opts__ = salt.config.minion_config(MINION_CONFIG)
            __opts__['file_client'] = 'local'
            self.caller = salt.client.Caller(mopts=__opts__)

        self._events = []

Beispiel #33

0

Datei anzeigen

    def on_sync_object(self, minion_id, data):
        if minion_id != self._favorite_mon:
            log.debug("Ignoring map from %s, it is not my favourite (%s)" % (minion_id, self._favorite_mon))

        assert data['fsid'] == self.fsid

        sync_object = data['data']

        sync_type = SYNC_OBJECT_STR_TYPE[data['type']]
        new_object = self.inject_sync_object(minion_id, data['type'], data['version'], sync_object)
        if new_object:
            self._requests.on_map(self.fsid, sync_type, new_object)
            self._persister.update_sync_object(
                self.fsid,
                self.name,
                sync_type.str,
                new_object.version if isinstance(new_object.version, int) else None,
                now(), sync_object)
        else:
            log.warn("ClusterMonitor.on_sync_object: stale object received from %s" % minion_id)

Beispiel #34

0

Datei anzeigen

Datei: manager.py Projekt: ceph/calamari

    def _emit_stats(self):
        try:
            if not self._socket:
                log.info("Opening carbon socket {0}:{1}".format(self.CARBON_HOST, self.CARBON_PORT))
                self._socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
                self._socket.connect((self.CARBON_HOST, self.CARBON_PORT))

            carbon_data = ""
            t = int(time.time())
            usage = resource.getrusage(resource.RUSAGE_SELF)
            for usage_field in ("utime", "stime", "maxrss", "ixrss", "idrss", "isrss", "minflt", "majflt",
                                "nswap", "inblock", "oublock", "msgsnd", "msgrcv", "nsignals", "nvcsw", "nivcsw"):
                val = getattr(usage, "ru_{0}".format(usage_field))
                log.debug("{0}: {1}".format(usage_field, val))
                carbon_data += "calamari.cthulhu.ru_{0} {1} {2}\n".format(usage_field, val, t)

            self._socket.sendall(carbon_data)
        except socket.gaierror, resource.error:
            log.exception("Failed to send debugging statistics")
            self._close()

Beispiel #35

0

Datei anzeigen

Datei: cluster_monitor.py Projekt: zerolugithub/calamari

    def on_job_complete(self, fqdn, jid, success, result, cmd, args):
        # It would be much nicer to put the FSID at the start of
        # the tag, if salt would only let us add custom tags to our jobs.
        # Instead we enforce a convention that calamari jobs include
        # fsid in their return value.
        if 'fsid' not in result or result['fsid'] != self.fsid:
            # Something for a different ClusterMonitor
            log.debug("Ignoring job return, not for my FSID")
            return

        if cmd == 'ceph.get_cluster_object':
            # A ceph.get_cluster_object response
            if not success:
                log.error("on_sync_object: failure from %s: %s" %
                          (fqdn, result))
                return

            self.on_sync_object(fqdn, result)
        else:
            log.warning("Unexpected function '%s' (%s)" % (cmd, cmd))

Beispiel #36

0

Datei anzeigen

    def create(self, attributes):
        commands = [('osd pool create', {'pool': attributes['name'], 'pg_num': attributes['pg_num']})]

        # Which attributes must we set after the initial create?
        post_create_attrs = attributes.copy()
        del post_create_attrs['name']
        del post_create_attrs['pg_num']
        if 'pgp_num' in post_create_attrs:
            del post_create_attrs['pgp_num']

        commands.extend(self._pool_attribute_commands(
            attributes['name'],
            post_create_attrs
        ))

        log.debug("Post-create attributes: %s" % post_create_attrs)
        log.debug("Commands: %s" % post_create_attrs)

        return PoolCreatingRequest(
            "Creating pool '{name}'".format(name=attributes['name']),
            self._cluster_monitor.fsid, self._cluster_monitor.name,
            attributes['name'], commands)

Beispiel #37

0

Datei anzeigen

    def on_heartbeat(self, minion_id, cluster_data):
        """
        Handle a ceph.heartbeat from a minion.

        Heartbeats come from all servers, but we're mostly interested in those
        which come from a mon (and therefore have the 'clusters' attribute populated)
        as these tells us whether there are any new versions of cluster maps
        for us to fetch.
        """

        if not self._is_favorite(minion_id):
            log.debug('Ignoring cluster data from %s, it is not my favourite (%s)' % (minion_id, self._favorite_mon))
            return

        self.update_time = datetime.datetime.utcnow().replace(tzinfo=utc)

        log.debug('Checking for version increments in heartbeat from %s' % minion_id)
        for sync_type in SYNC_OBJECT_TYPES:
            self._sync_objects.on_version(
                minion_id,
                sync_type,
                cluster_data['versions'][sync_type.str])

Beispiel #38

0

Datei anzeigen

Datei: cluster_monitor.py Projekt: zerolugithub/calamari

    def fetch(self, minion_id, sync_type):
        log.debug("SyncObjects.fetch: %s/%s" % (minion_id, sync_type))
        if minion_id is None:
            # We're probably being replayed to from the database
            log.warn("SyncObjects.fetch called with minion_id=None")
            return

        self._fetching_at[sync_type] = now()
        try:
            # TODO clean up unused 'since' argument
            jid = remote.run_job(
                minion_id, 'ceph.get_cluster_object', {
                    'cluster_name': self._cluster_name,
                    'sync_type': sync_type.str,
                    'since': None
                })
        except Unavailable:
            # Don't throw an exception because if a fetch fails we should end up
            # issuing another on next heartbeat
            log.error("Failed to start fetch job %s/%s" %
                      (minion_id, sync_type))
        else:
            log.debug("SyncObjects.fetch: jid=%s" % jid)

Beispiel #39

0

Datei anzeigen

Datei: cluster_monitor.py Projekt: skizhak/calamari

    def inject_sync_object(self, minion_id, sync_type, version, data):
        sync_type = SYNC_OBJECT_STR_TYPE[sync_type]
        old_object = self._sync_objects.get(sync_type)
        new_object = self._sync_objects.on_fetch_complete(minion_id, sync_type, version, data)

        if new_object:
            # The ServerMonitor is interested in cluster maps, do this prior
            # to updating any derived objects so that derived generators have
            # access to latest view of server state
            if sync_type == OsdMap:
                self._servers.on_osd_map(data)
            elif sync_type == MonMap:
                self._servers.on_mon_map(data)
            elif sync_type == MdsMap:
                self._servers.on_mds_map(self.fsid, data)

            # The frontend would like us to maintain some derived objects that
            # munge together the PG and OSD maps into an easier-to-consume form.
            for generator in derived.generators:
                if sync_type in generator.depends:
                    dependency_data = {}
                    for t in generator.depends:
                        obj = self._sync_objects.get(t)
                        if obj is not None:
                            dependency_data[t] = obj.data
                        else:
                            dependency_data[t] = None

                    if None not in dependency_data.values():
                        log.debug("Updating %s" % generator.__name__)
                        derived_objects = generator.generate(self, self._servers, dependency_data)
                        self._derived_objects.update(derived_objects)

            self._eventer.on_sync_object(self.fsid, sync_type, new_object, old_object)

        return new_object

Beispiel #40

0

Datei anzeigen

    def _emit_to_salt_bus(self, severity, message, tag, **tags):
        """
        This function emits events to salt event bus, if the config
        value "emit_events_to_salt_event_bus" is set to true.
        """
        log.debug("Eventer running _emit_salt")
        if not EMIT_EVENTS_TO_SALT_EVENT_BUS:
            return

        log.debug("Eventer running _emit_salt")
        res = {}
        res["message"] = message
        res["severity"] = severity
        res["tags"] = tags
        tag = EVENT_TAG_PREFIX + tag

        log.debug("Eventer._emit_to_salt_bus: Tag:%s | Data: %s" %
                  (str(tag), str(res)))

        self.caller.sminion.functions['event.send'](tag, res)

Beispiel #41

0

Datei anzeigen

Datei: eventer.py Projekt: ceph/calamari

    def _emit_to_salt_bus(self, severity, message, tag, **tags):
        """
        This function emits events to salt event bus, if the config
        value "emit_events_to_salt_event_bus" is set to true.
        """
        log.debug("Eventer running _emit_salt")
        if not EMIT_EVENTS_TO_SALT_EVENT_BUS:
            return

        log.debug("Eventer running _emit_salt")
        res = {}
        res["message"] = message
        res["severity"] = severity
        res["tags"] = tags
        tag = EVENT_TAG_PREFIX + tag

        log.debug("Eventer._emit_to_salt_bus: Tag:%s | Data: %s" % (str(tag), str(res)))

        self.caller.sminion.functions['event.send'](
            tag,
            res
        )

Beispiel #42

0

Datei anzeigen

Datei: manager.py Projekt: ceph/calamari

 def on_heartbeat(self, fqdn, data):
     if not data['fsid'] in self._manager.clusters:
         self._manager.on_discovery(fqdn, data)
     else:
         log.debug("%s: heartbeat from existing cluster %s" % (
             self.__class__.__name__, data['fsid']))

Beispiel #43

0

Datei anzeigen

 def list_server_logs(self, fqdn):
     client = LocalClient(config.get('cthulhu', 'salt_config_path'))
     results = client.cmd(fqdn, "log_tail.list_logs", ["."])
     log.debug('list_server_log result !!! {results}'.format(
         results=str(results)))
     return results

Beispiel #44

0

Datei anzeigen

Datei: eventer.py Projekt: AnishSid/calamari

 def stop(self):
     log.debug("Eventer stopping")
     self._complete.set()

Beispiel #45

0

Datei anzeigen

    def _run(self):
        self._plugin_monitor.start()

        self._ready.set()
        log.debug("ClusterMonitor._run: ready")

        event = SaltEventSource(log, salt_config)

        while not self._complete.is_set():
            # No salt tag filtering: https://github.com/saltstack/salt/issues/11582
            ev = event.get_event(full=True)

            if ev is not None:
                data = ev['data']
                tag = ev['tag']
                log.debug("_run.ev: %s/tag=%s" % (data['id'] if 'id' in data else None, tag))

                # I am interested in the following tags:
                # - salt/job/<jid>/ret/<minion id> where jid is one that I started
                #   (this includes ceph.rados_command and ceph.get_cluster_object)
                # - ceph/cluster/<fsid> where fsid is my fsid

                try:
                    if tag.startswith("ceph/cluster/{0}".format(self.fsid)):
                        # A ceph.heartbeat beacon
                        self.on_heartbeat(data['id'], data['data'])
                    elif re.match("^salt/job/\d+/ret/[^/]+$", tag):
                        if data['fun'] == "saltutil.running":
                            # Update on what jobs are running
                            # It would be nice to filter these down to those which really are for
                            # this cluster, but as long as N_clusters and N_jobs are reasonably small
                            # it's not an efficiency problem.
                            self._requests.on_tick_response(data['id'], data['return'])

                        # It would be much nicer to put the FSID at the start of
                        # the tag, if salt would only let us add custom tags to our jobs.
                        # Instead we enforce a convention that all calamari jobs must include
                        # fsid in their return value.
                        if (not isinstance(data, dict)) or not isinstance(data['return'], dict):
                            # Something not formatted for ClusterMonitor
                            log.warning("Ignoring event %s" % tag)
                            continue

                        if 'fsid' not in data['return'] or data['return']['fsid'] != self.fsid:
                            # Something for a different ClusterMonitor
                            log.debug("Ignoring job return, not for my FSID")
                            continue

                        if data['fun'] == 'ceph.get_cluster_object':
                            # A ceph.get_cluster_object response
                            if not data['success']:
                                log.error("on_sync_object: failure from %s: %s" % (data['id'], data['return']))
                                continue

                            self.on_sync_object(data['id'], data['return'])
                        else:
                            log.warning("Unexpected function '%s' (%s)" % (data['fun'], tag))
                    else:
                        # This does not concern us, ignore it
                        pass
                except:
                    # Because this is our main event handling loop, swallow exceptions
                    # instead of letting them end the world.
                    log.exception("Exception handling message with tag %s" % tag)
                    log.debug("Message content: %s" % data)

        log.info("%s complete" % self.__class__.__name__)
        self._plugin_monitor.stop()
        self._plugin_monitor.join()
        self.done.set()

Beispiel #46

0

Datei anzeigen

 def reset_event_sink(self):
     if EMIT_EVENTS_TO_SALT_EVENT_BUS:
         log.debug("resetting minion")
         __opts__ = salt.config.minion_config(MINION_CONFIG)
         __opts__['file_client'] = 'local'
         self.caller = salt.client.Caller(mopts=__opts__)

Beispiel #47

0

Datei anzeigen

    def _recover(self):
        if sqlalchemy is None:
            return

        session = Session()
        for server in session.query(Server).all():
            log.debug("Recovered server %s" % server.fqdn)
            assert server.boot_time is None or server.boot_time.tzinfo is not None  # expect timezone-aware DB backend
            self.servers.inject_server(
                ServerState(fqdn=server.fqdn,
                            hostname=server.hostname,
                            managed=server.managed,
                            last_contact=server.last_contact,
                            boot_time=server.boot_time,
                            ceph_version=server.ceph_version))

        for service in session.query(Service).all():
            if service.server:
                server = session.query(Server).get(service.server)
            else:
                server = None
            log.debug("Recovered service %s/%s/%s on %s" %
                      (service.fsid, service.service_type, service.service_id,
                       server.fqdn if server else None))
            self.servers.inject_service(
                ServiceState(fsid=service.fsid,
                             service_type=service.service_type,
                             service_id=service.service_id),
                server.fqdn if server else None)

        # I want the most recent version of every sync_object
        fsids = [(row[0], row[1]) for row in session.query(
            SyncObject.fsid, SyncObject.cluster_name).distinct(SyncObject.fsid)
                 ]
        for fsid, name in fsids:
            cluster_monitor = ClusterMonitor(fsid, name, self.notifier,
                                             self.persister, self.servers,
                                             self.eventer, self.requests)
            self.clusters[fsid] = cluster_monitor

            object_types = [
                row[0]
                for row in session.query(SyncObject.sync_type).filter_by(
                    fsid=fsid).distinct()
            ]
            for sync_type in object_types:
                latest_record = session.query(SyncObject).filter_by(
                    fsid=fsid,
                    sync_type=sync_type).order_by(SyncObject.version.desc(),
                                                  SyncObject.when.desc())[0]

                # FIXME: bit of a hack because records persisted only store their 'version'
                # if it's a real counter version, underlying problem is that we have
                # underlying data (health, pg_brief) without usable version counters.
                def md5(raw):
                    hasher = hashlib.md5()
                    hasher.update(raw)
                    return hasher.hexdigest()

                if latest_record.version:
                    version = latest_record.version
                else:
                    version = md5(latest_record.data)

                when = latest_record.when
                when = when.replace(tzinfo=tzutc())
                if cluster_monitor.update_time is None or when > cluster_monitor.update_time:
                    cluster_monitor.update_time = when

                cluster_monitor.inject_sync_object(
                    None, sync_type, version,
                    msgpack.unpackb(latest_record.data))

        for monitor in self.clusters.values():
            log.info("Recovery: Cluster %s with update time %s" %
                     (monitor.fsid, monitor.update_time))
            monitor.start()

Beispiel #48

0

Datei anzeigen

Datei: eventer.py Projekt: hit1943/calamari-1

 def stop(self):
     log.debug("Eventer stopping")
     self._complete.set()

Beispiel #49

0

Datei anzeigen

Datei: manager.py Projekt: ceph/calamari

    def _recover(self):
        if sqlalchemy is None:
            return

        session = Session()
        for server in session.query(Server).all():
            log.debug("Recovered server %s" % server.fqdn)
            assert server.boot_time is None or server.boot_time.tzinfo is not None  # expect timezone-aware DB backend
            self.servers.inject_server(ServerState(
                fqdn=server.fqdn,
                hostname=server.hostname,
                managed=server.managed,
                last_contact=server.last_contact,
                boot_time=server.boot_time,
                ceph_version=server.ceph_version
            ))

        for service in session.query(Service).all():
            if service.server:
                server = session.query(Server).get(service.server)
            else:
                server = None
            log.debug("Recovered service %s/%s/%s on %s" % (
                service.fsid, service.service_type, service.service_id, server.fqdn if server else None
            ))
            self.servers.inject_service(ServiceState(
                fsid=service.fsid,
                service_type=service.service_type,
                service_id=service.service_id
            ), server.fqdn if server else None)

        # I want the most recent version of every sync_object
        fsids = [(row[0], row[1]) for row in session.query(SyncObject.fsid, SyncObject.cluster_name).distinct(SyncObject.fsid)]
        for fsid, name in fsids:
            cluster_monitor = ClusterMonitor(fsid, name, self.persister, self.servers,
                                             self.eventer, self.requests)
            self.clusters[fsid] = cluster_monitor

            object_types = [row[0] for row in session.query(SyncObject.sync_type).filter_by(fsid=fsid).distinct()]
            for sync_type in object_types:
                latest_record = session.query(SyncObject).filter_by(
                    fsid=fsid, sync_type=sync_type).order_by(
                    SyncObject.version.desc(), SyncObject.when.desc())[0]

                # FIXME: bit of a hack because records persisted only store their 'version'
                # if it's a real counter version, underlying problem is that we have
                # underlying data (health, pg_brief) without usable version counters.
                def md5(raw):
                    hasher = hashlib.md5()
                    hasher.update(raw)
                    return hasher.hexdigest()

                if latest_record.version:
                    version = latest_record.version
                else:
                    version = md5(latest_record.data)

                when = latest_record.when
                when = when.replace(tzinfo=tzutc())
                if cluster_monitor.update_time is None or when > cluster_monitor.update_time:
                    cluster_monitor.update_time = when

                cluster_monitor.inject_sync_object(None, sync_type, version, msgpack.unpackb(latest_record.data))

        for monitor in self.clusters.values():
            log.info("Recovery: Cluster %s with update time %s" % (monitor.fsid, monitor.update_time))
            monitor.start()

Beispiel #50

0

Datei anzeigen

Datei: rpc.py Projekt: ivaxer/calamari

 def list_server_logs(self, fqdn):
     client = LocalClient(config.get('cthulhu', 'salt_config_path'))
     results = client.cmd(fqdn, "log_tail.list_logs", ["."])
     log.debug('list_server_log result !!! {results}'.format(results=str(results)))
     return results