コード例 #1
0
ファイル: agent_daemon.py プロジェクト: zaja1kun/iml-agent
def main():
    """handle unexpected exceptions"""
    parser = argparse.ArgumentParser(
        description="Integrated Manager for Lustre software Agent")

    parser.add_argument("--publish-zconf", action="store_true")
    parser.parse_args()

    signal.signal(signal.SIGHUP, signal.SIG_IGN)

    daemon_log_setup()
    console_log_setup()
    daemon_log.info("Starting")

    try:
        daemon_log.info("Entering main loop")
        try:
            url = urljoin(os.environ["IML_MANAGER_URL"], "agent/message/")
        except KeyError as e:
            daemon_log.error(
                "No configuration found (must be registered before running the agent service), "
                "details: %s" % e)
            return

        if config.profile_managed is False:
            # This is kind of terrible. The design of DevicePluginManager is
            # such that it can be called with either class methods or
            # instantiated and then called with instance methods. As such,
            # we can't pass in a list of excluded plugins to the instance
            # constructor. Well, we could, but it would only work some
            # of the time and that would be even more awful.
            import chroma_agent.plugin_manager
            chroma_agent.plugin_manager.EXCLUDED_PLUGINS += ['corosync']

        agent_client = AgentClient(url, ActionPluginManager(),
                                   DevicePluginManager(), ServerProperties(),
                                   Crypto(ENV_PATH))

        def teardown_callback(*args, **kwargs):
            agent_client.stop()
            agent_client.join()
            [function() for function in agent_daemon_teardown_functions]

        signal.signal(signal.SIGINT, teardown_callback)
        signal.signal(signal.SIGTERM, teardown_callback)
        signal.signal(signal.SIGUSR1, decrease_loglevel)
        signal.signal(signal.SIGUSR2, increase_loglevel)

        # Call any agent daemon startup methods that were registered.
        [function() for function in agent_daemon_startup_functions]

        agent_client.start()
        # Waking-wait to pick up signals
        while not agent_client.stopped.is_set():
            agent_client.stopped.wait(timeout=10)

        agent_client.join()
    except Exception, e:
        backtrace = '\n'.join(traceback.format_exception(*(sys.exc_info())))
        daemon_log.error("Unhandled exception: %s" % backtrace)
コード例 #2
0
    def _process_zpool(self, pool, block_devices):
        """
        Either read pool info from store if unavailable or inspect by importing

        :param pool: dict of pool info
        :return: None
        """
        pool_name = pool['pool']

        with ZfsDevice(pool_name, True) as zfs_device:

            if zfs_device.available:
                out = AgentShell.try_run(["zpool", "list", "-H", "-o", "name,size,guid", pool['pool']])
                self._add_zfs_pool(out, block_devices)
            else:
                # zpool probably imported elsewhere, attempt to read from store, this should return
                # previously seen zpool state either with or without datasets
                pool_id = pool.get('id', None)

                try:
                    if pool_id is None:
                        data = find_name_in_store(pool_name)
                    else:
                        data = read_from_store(pool_id)
                except KeyError as e:
                    daemon_log.error("ZfsPool unavailable and could not be retrieved from store: %s ("
                                     "pool info: %s)" % (e, pool))
                else:
                    # populate self._pools/datasets/zvols info from saved data read from store
                    self._update_pool_or_datasets(block_devices,
                                                  data['pool'],
                                                  data['datasets'],
                                                  data['zvols'])
コード例 #3
0
 def run(self):
     try:
         self._run()
     except Exception:
         backtrace = "\n".join(
             traceback.format_exception(*(sys.exc_info())))
         daemon_log.error("Unhandled error in thread %s: %s" %
                          (self.__class__.__name__, backtrace))
         sys.exit(-1)
コード例 #4
0
def scan_packages():
    """
    Interrogate the packages available from configured repositories, and the installation
    status of those packages.
    """

    # Look up what repos are configured
    # =================================
    if not os.path.exists(REPO_PATH):
        return None

    cp = ConfigParser.SafeConfigParser()
    cp.read(REPO_PATH)
    repo_names = sorted(cp.sections())
    repo_packages = dict([(name,
                           defaultdict(lambda: {
                               'available': [],
                               'installed': []
                           })) for name in repo_names])

    # For all repos, enumerate packages in the repo in alphabetic order
    # =================================================================
    yum_util('clean', fromrepo=repo_names)

    # For all repos, query packages in alphabetical order
    # ===================================================
    for repo_name in repo_names:
        packages = repo_packages[repo_name]
        try:
            stdout = yum_util('repoquery', fromrepo=[repo_name])

            # Returning nothing means the package was not found at all and so we have no data to deliver back.
            if stdout:
                for line in [l.strip() for l in stdout.strip().split("\n")]:
                    if line.startswith("Last metadata expiration check") or \
                       line.startswith("Waiting for process with pid"):
                        continue
                    epoch, name, version, release, arch = line.split()
                    if arch == "src":
                        continue
                    packages[name]['available'].append(
                        VersionInfo(epoch=epoch,
                                    version=version,
                                    release=release,
                                    arch=arch))
        except ValueError, e:
            console_log.error("bug HYD-2948. repoquery Output: %s" % (stdout))
            raise e
        except RuntimeError, e:
            # This is a network operation, so cope with it failing
            daemon_log.error(e)
            return None
コード例 #5
0
    def poll(self, plugin_name):
        """
        For any plugins that don't have a session, try asking for one.
        For any ongoing sessions, invoke the poll callback
        """

        now = datetime.datetime.now()

        try:
            session = self._client.sessions.get(plugin_name)
        except KeyError:
            # Request to open a session
            #
            if plugin_name in self._client.sessions._requested_at:
                next_request_at = (
                    self._client.sessions._requested_at[plugin_name] +
                    self._client.sessions._backoffs[plugin_name])
                if now < next_request_at:
                    # We're still in our backoff period, skip requesting a session
                    daemon_log.debug("Delaying session request until %s" %
                                     next_request_at)
                    return
                else:
                    if (self._client.sessions._backoffs[plugin_name] <
                            MAX_SESSION_BACKOFF):
                        self._client.sessions._backoffs[plugin_name] *= 2

            daemon_log.debug("Requesting session for plugin %s" % plugin_name)
            self._client.sessions._requested_at[plugin_name] = now
            self.put(Message("SESSION_CREATE_REQUEST", plugin_name))
        else:
            try:
                data = session.poll()
            except Exception:
                backtrace = "\n".join(
                    traceback.format_exception(*(sys.exc_info())))
                daemon_log.error("Error in plugin %s: %s" %
                                 (plugin_name, backtrace))
                self._client.sessions.terminate(plugin_name)
                self.put(Message("SESSION_CREATE_REQUEST", plugin_name))
            else:
                if data is not None:
                    if isinstance(data, DevicePluginMessageCollection):
                        for message in data:
                            session.send_message(
                                DevicePluginMessage(message,
                                                    priority=data.priority))
                    elif isinstance(data, DevicePluginMessage):
                        session.send_message(data)
                    else:
                        session.send_message(DevicePluginMessage(data))
コード例 #6
0
    def _handle_messages(self, messages):
        daemon_log.info("HttpReader: got %s messages" % (len(messages)))
        for message in messages:
            m = Message()
            m.parse(message)
            daemon_log.info("HttpReader: %s(%s, %s)" %
                            (m.type, m.plugin_name, m.session_id))

            try:
                if m.type == "SESSION_CREATE_RESPONSE":
                    self._client.sessions.create(m.plugin_name, m.session_id)
                elif m.type == "SESSION_TERMINATE_ALL":
                    self._client.sessions.terminate_all()
                elif m.type == "SESSION_TERMINATE":
                    self._client.sessions.terminate(m.plugin_name)
                elif m.type == "DATA":
                    try:
                        session = self._client.sessions.get(
                            m.plugin_name, m.session_id)
                    except KeyError:
                        daemon_log.warning(
                            "Received a message for unknown session %s/%s" %
                            (m.plugin_name, m.session_id))
                    else:
                        # We have successfully routed the message to the plugin instance
                        # for this session
                        try:
                            session.receive_message(m.body)
                        except:
                            daemon_log.error("%s/%s raised an exception: %s" %
                                             (m.plugin_name, m.session_id,
                                              traceback.format_exc()))
                            self._client.sessions.terminate(m.plugin_name)
                else:
                    raise NotImplementedError(m.type)
            except Exception:
                backtrace = "\n".join(
                    traceback.format_exception(*(sys.exc_info())))
                daemon_log.error("Plugin exception handling data message: %s" %
                                 backtrace)
コード例 #7
0
ファイル: zfs.py プロジェクト: jn0/intel-manager-for-lustre
    def full_scan(self, block_devices):
        zpools = []
        try:
            zpools.extend(get_zpools())
            active_pool_names = [pool['pool'] for pool in zpools]
            zpools.extend(
                filter(lambda x: x['pool'] not in active_pool_names,
                       get_zpools(active=False)))

            for pool in zpools:
                with ZfsDevice(pool['pool'], True) as zfs_device:
                    if zfs_device.available:
                        out = AgentShell.try_run([
                            "zpool", "list", "-H", "-o", "name,size,guid",
                            pool['pool']
                        ])
                        self._add_zfs_pool(out, block_devices)
                    elif pool['state'] == 'UNAVAIL':
                        # zpool probably imported elsewhere, attempt to read from store, this should return
                        # previously seen zpool state either with or without datasets
                        try:
                            data = read_from_store(pool['id'])
                        except KeyError as e:
                            daemon_log.error(
                                "ZfsPool unavailable and could not be retrieved from store: %s ("
                                "pool: %s)" % (e, pool['pool']))
                            continue
                        else:
                            # populate self._pools/datasets/zvols info from saved data read from store
                            self._update_pool_or_datasets(
                                block_devices, data['pool'], data['datasets'],
                                data['zvols'])
                    else:
                        daemon_log.error(
                            "ZfsPool could not be accessed, reported info: %s"
                            % pool)
        except OSError:  # OSError occurs when ZFS is not installed.
            self._zpools = {}
            self._datasets = {}
            self._zvols = {}
コード例 #8
0
    def request(self, method, **kwargs):
        cert, key = self._crypto.certificate_file, self._crypto.private_key_file
        if cert:
            kwargs["cert"] = (cert, key)

        try:
            response = requests.request(
                method,
                self.url,
                # FIXME: set verify to true if we have a CA bundle
                verify=False,
                headers={"Content-Type": "application/json"},
                **kwargs)
        except (
                socket.error,
                requests.exceptions.ConnectionError,
                requests.exceptions.ReadTimeout,
                requests.exceptions.SSLError,
        ) as e:
            daemon_log.error("Error connecting to %s: %s" % (self.url, e))
            raise HttpError()
        except Exception as e:
            # If debugging is enabled meaning we are in test for example then raise the error again and the app
            # will crash. If debugging not enabled then this is a user scenario and it is better that we attempt
            # to carry on. No data will be transferred and so badness cannot happen.
            daemon_log.error("requests returned an unexpected error %s" % e)

            if logging_in_debug_mode:
                raise

            raise HttpError()

        if not response.ok:
            daemon_log.error("Bad status %s from %s to %s" %
                             (response.status_code, method, self.url))
            if response.status_code == 413:
                daemon_log.error("Oversized request: %s" %
                                 json.dumps(kwargs, indent=2))
            raise HttpError()
        try:
            return response.json()
        except ValueError:
            return None
コード例 #9
0
        console_log_setup()
        daemon_log.info("Starting in the background")
    else:
        context = None
        daemon_log_setup()
        daemon_log.addHandler(logging.StreamHandler())

        console_log_setup()

    try:
        daemon_log.info("Entering main loop")
        try:
            conf = config.get('settings', 'server')
        except (KeyError, TypeError) as e:
            daemon_log.error(
                "No configuration found (must be registered before running the agent service), "
                "details: %s" % e)
            return

        if config.profile_managed is False:
            # This is kind of terrible. The design of DevicePluginManager is
            # such that it can be called with either class methods or
            # instantiated and then called with instance methods. As such,
            # we can't pass in a list of excluded plugins to the instance
            # constructor. Well, we could, but it would only work some
            # of the time and that would be even more awful.
            import chroma_agent.plugin_manager
            chroma_agent.plugin_manager.EXCLUDED_PLUGINS += ['corosync']

        agent_client = AgentClient(conf['url'] + "message/",
                                   ActionPluginManager(),