def main(): """handle unexpected exceptions""" parser = argparse.ArgumentParser( description="Integrated Manager for Lustre software Agent") parser.add_argument("--publish-zconf", action="store_true") parser.parse_args() signal.signal(signal.SIGHUP, signal.SIG_IGN) daemon_log_setup() console_log_setup() daemon_log.info("Starting") try: daemon_log.info("Entering main loop") try: url = urljoin(os.environ["IML_MANAGER_URL"], "agent/message/") except KeyError as e: daemon_log.error( "No configuration found (must be registered before running the agent service), " "details: %s" % e) return if config.profile_managed is False: # This is kind of terrible. The design of DevicePluginManager is # such that it can be called with either class methods or # instantiated and then called with instance methods. As such, # we can't pass in a list of excluded plugins to the instance # constructor. Well, we could, but it would only work some # of the time and that would be even more awful. import chroma_agent.plugin_manager chroma_agent.plugin_manager.EXCLUDED_PLUGINS += ['corosync'] agent_client = AgentClient(url, ActionPluginManager(), DevicePluginManager(), ServerProperties(), Crypto(ENV_PATH)) def teardown_callback(*args, **kwargs): agent_client.stop() agent_client.join() [function() for function in agent_daemon_teardown_functions] signal.signal(signal.SIGINT, teardown_callback) signal.signal(signal.SIGTERM, teardown_callback) signal.signal(signal.SIGUSR1, decrease_loglevel) signal.signal(signal.SIGUSR2, increase_loglevel) # Call any agent daemon startup methods that were registered. [function() for function in agent_daemon_startup_functions] agent_client.start() # Waking-wait to pick up signals while not agent_client.stopped.is_set(): agent_client.stopped.wait(timeout=10) agent_client.join() except Exception, e: backtrace = '\n'.join(traceback.format_exception(*(sys.exc_info()))) daemon_log.error("Unhandled exception: %s" % backtrace)
def _process_zpool(self, pool, block_devices): """ Either read pool info from store if unavailable or inspect by importing :param pool: dict of pool info :return: None """ pool_name = pool['pool'] with ZfsDevice(pool_name, True) as zfs_device: if zfs_device.available: out = AgentShell.try_run(["zpool", "list", "-H", "-o", "name,size,guid", pool['pool']]) self._add_zfs_pool(out, block_devices) else: # zpool probably imported elsewhere, attempt to read from store, this should return # previously seen zpool state either with or without datasets pool_id = pool.get('id', None) try: if pool_id is None: data = find_name_in_store(pool_name) else: data = read_from_store(pool_id) except KeyError as e: daemon_log.error("ZfsPool unavailable and could not be retrieved from store: %s (" "pool info: %s)" % (e, pool)) else: # populate self._pools/datasets/zvols info from saved data read from store self._update_pool_or_datasets(block_devices, data['pool'], data['datasets'], data['zvols'])
def run(self): try: self._run() except Exception: backtrace = "\n".join( traceback.format_exception(*(sys.exc_info()))) daemon_log.error("Unhandled error in thread %s: %s" % (self.__class__.__name__, backtrace)) sys.exit(-1)
def scan_packages(): """ Interrogate the packages available from configured repositories, and the installation status of those packages. """ # Look up what repos are configured # ================================= if not os.path.exists(REPO_PATH): return None cp = ConfigParser.SafeConfigParser() cp.read(REPO_PATH) repo_names = sorted(cp.sections()) repo_packages = dict([(name, defaultdict(lambda: { 'available': [], 'installed': [] })) for name in repo_names]) # For all repos, enumerate packages in the repo in alphabetic order # ================================================================= yum_util('clean', fromrepo=repo_names) # For all repos, query packages in alphabetical order # =================================================== for repo_name in repo_names: packages = repo_packages[repo_name] try: stdout = yum_util('repoquery', fromrepo=[repo_name]) # Returning nothing means the package was not found at all and so we have no data to deliver back. if stdout: for line in [l.strip() for l in stdout.strip().split("\n")]: if line.startswith("Last metadata expiration check") or \ line.startswith("Waiting for process with pid"): continue epoch, name, version, release, arch = line.split() if arch == "src": continue packages[name]['available'].append( VersionInfo(epoch=epoch, version=version, release=release, arch=arch)) except ValueError, e: console_log.error("bug HYD-2948. repoquery Output: %s" % (stdout)) raise e except RuntimeError, e: # This is a network operation, so cope with it failing daemon_log.error(e) return None
def poll(self, plugin_name): """ For any plugins that don't have a session, try asking for one. For any ongoing sessions, invoke the poll callback """ now = datetime.datetime.now() try: session = self._client.sessions.get(plugin_name) except KeyError: # Request to open a session # if plugin_name in self._client.sessions._requested_at: next_request_at = ( self._client.sessions._requested_at[plugin_name] + self._client.sessions._backoffs[plugin_name]) if now < next_request_at: # We're still in our backoff period, skip requesting a session daemon_log.debug("Delaying session request until %s" % next_request_at) return else: if (self._client.sessions._backoffs[plugin_name] < MAX_SESSION_BACKOFF): self._client.sessions._backoffs[plugin_name] *= 2 daemon_log.debug("Requesting session for plugin %s" % plugin_name) self._client.sessions._requested_at[plugin_name] = now self.put(Message("SESSION_CREATE_REQUEST", plugin_name)) else: try: data = session.poll() except Exception: backtrace = "\n".join( traceback.format_exception(*(sys.exc_info()))) daemon_log.error("Error in plugin %s: %s" % (plugin_name, backtrace)) self._client.sessions.terminate(plugin_name) self.put(Message("SESSION_CREATE_REQUEST", plugin_name)) else: if data is not None: if isinstance(data, DevicePluginMessageCollection): for message in data: session.send_message( DevicePluginMessage(message, priority=data.priority)) elif isinstance(data, DevicePluginMessage): session.send_message(data) else: session.send_message(DevicePluginMessage(data))
def _handle_messages(self, messages): daemon_log.info("HttpReader: got %s messages" % (len(messages))) for message in messages: m = Message() m.parse(message) daemon_log.info("HttpReader: %s(%s, %s)" % (m.type, m.plugin_name, m.session_id)) try: if m.type == "SESSION_CREATE_RESPONSE": self._client.sessions.create(m.plugin_name, m.session_id) elif m.type == "SESSION_TERMINATE_ALL": self._client.sessions.terminate_all() elif m.type == "SESSION_TERMINATE": self._client.sessions.terminate(m.plugin_name) elif m.type == "DATA": try: session = self._client.sessions.get( m.plugin_name, m.session_id) except KeyError: daemon_log.warning( "Received a message for unknown session %s/%s" % (m.plugin_name, m.session_id)) else: # We have successfully routed the message to the plugin instance # for this session try: session.receive_message(m.body) except: daemon_log.error("%s/%s raised an exception: %s" % (m.plugin_name, m.session_id, traceback.format_exc())) self._client.sessions.terminate(m.plugin_name) else: raise NotImplementedError(m.type) except Exception: backtrace = "\n".join( traceback.format_exception(*(sys.exc_info()))) daemon_log.error("Plugin exception handling data message: %s" % backtrace)
def full_scan(self, block_devices): zpools = [] try: zpools.extend(get_zpools()) active_pool_names = [pool['pool'] for pool in zpools] zpools.extend( filter(lambda x: x['pool'] not in active_pool_names, get_zpools(active=False))) for pool in zpools: with ZfsDevice(pool['pool'], True) as zfs_device: if zfs_device.available: out = AgentShell.try_run([ "zpool", "list", "-H", "-o", "name,size,guid", pool['pool'] ]) self._add_zfs_pool(out, block_devices) elif pool['state'] == 'UNAVAIL': # zpool probably imported elsewhere, attempt to read from store, this should return # previously seen zpool state either with or without datasets try: data = read_from_store(pool['id']) except KeyError as e: daemon_log.error( "ZfsPool unavailable and could not be retrieved from store: %s (" "pool: %s)" % (e, pool['pool'])) continue else: # populate self._pools/datasets/zvols info from saved data read from store self._update_pool_or_datasets( block_devices, data['pool'], data['datasets'], data['zvols']) else: daemon_log.error( "ZfsPool could not be accessed, reported info: %s" % pool) except OSError: # OSError occurs when ZFS is not installed. self._zpools = {} self._datasets = {} self._zvols = {}
def request(self, method, **kwargs): cert, key = self._crypto.certificate_file, self._crypto.private_key_file if cert: kwargs["cert"] = (cert, key) try: response = requests.request( method, self.url, # FIXME: set verify to true if we have a CA bundle verify=False, headers={"Content-Type": "application/json"}, **kwargs) except ( socket.error, requests.exceptions.ConnectionError, requests.exceptions.ReadTimeout, requests.exceptions.SSLError, ) as e: daemon_log.error("Error connecting to %s: %s" % (self.url, e)) raise HttpError() except Exception as e: # If debugging is enabled meaning we are in test for example then raise the error again and the app # will crash. If debugging not enabled then this is a user scenario and it is better that we attempt # to carry on. No data will be transferred and so badness cannot happen. daemon_log.error("requests returned an unexpected error %s" % e) if logging_in_debug_mode: raise raise HttpError() if not response.ok: daemon_log.error("Bad status %s from %s to %s" % (response.status_code, method, self.url)) if response.status_code == 413: daemon_log.error("Oversized request: %s" % json.dumps(kwargs, indent=2)) raise HttpError() try: return response.json() except ValueError: return None
console_log_setup() daemon_log.info("Starting in the background") else: context = None daemon_log_setup() daemon_log.addHandler(logging.StreamHandler()) console_log_setup() try: daemon_log.info("Entering main loop") try: conf = config.get('settings', 'server') except (KeyError, TypeError) as e: daemon_log.error( "No configuration found (must be registered before running the agent service), " "details: %s" % e) return if config.profile_managed is False: # This is kind of terrible. The design of DevicePluginManager is # such that it can be called with either class methods or # instantiated and then called with instance methods. As such, # we can't pass in a list of excluded plugins to the instance # constructor. Well, we could, but it would only work some # of the time and that would be even more awful. import chroma_agent.plugin_manager chroma_agent.plugin_manager.EXCLUDED_PLUGINS += ['corosync'] agent_client = AgentClient(conf['url'] + "message/", ActionPluginManager(),