def get_node_metadata(conn, hostname, cluster_nodes): # "import" the remote functions so that remote calls using the # functions can be executed conn.import_module(remote.functions) node_metadata = {'ceph': {}} # collect paths and files first loader.write('Host: %-*s collecting: [%s]' % (20, hostname, terminal.yellow('paths'))) node_metadata['paths'] = collect_paths(conn) loader.write('Host: %-*s collecting: [%s]' % (20, hostname, terminal.green('paths'))) # TODO: collect network information, passing all the cluster_nodes # so that it can check for inter-node connectivity loader.write('Host: %-*s collecting: [%s]' % (20, hostname, terminal.yellow('network'))) node_metadata['network'] = collect_network(cluster_nodes) loader.write('Host: %-*s collecting: [%s]' % (20, hostname, terminal.green('network'))) # TODO: collect device information loader.write('Host: %-*s collecting: [%s]' % (20, hostname, terminal.yellow('devices'))) node_metadata['devices'] = collect_devices() loader.write('Host: %-*s collecting: [%s]' % (20, hostname, terminal.green('devices'))) # collect ceph information loader.write('Host: %-*s collecting: [%s]' % (20, hostname, terminal.yellow('ceph information'))) node_metadata['ceph'] = collect_ceph_info(conn) node_metadata['ceph']['sockets'] = collect_socket_info(conn, node_metadata) loader.write('Host: %-*s collecting: [%s]' % (20, hostname, terminal.green('ceph information'))) return node_metadata
def help(self, sub_help=None): if self.hosts_file is None: hosts_file_header = terminal.red('Loaded Inventory Hosts file') hosts_file = 'No hosts file found in cwd, /etc/ansible/, or configured' else: hosts_file_header = terminal.green('Loaded Inventory Hosts file') hosts_file = self.hosts_file return self._help.format( version=ceph_medic.__version__, config_path=self.config_path, config_path_header=terminal.green('Loaded Config Path'), hosts_file=hosts_file, hosts_file_header=hosts_file_header, sub_help=sub_help, configured_nodes=self.configured_nodes)
def report(results): msg = "\n{passed}{error}{warning}{skipped}{internal_errors}{hosts}" if results.errors: msg = terminal.red(msg) elif results.warnings: msg = terminal.yellow(msg) else: msg = terminal.green(msg) errors = warnings = internal_errors = '' if results.errors: errors = '%s errors, ' % results.errors if results.errors > 1 else '1 error, ' if results.warnings: warnings = '%s warnings, ' % results.warnings if results.warnings > 1 else '1 warning, ' if results.internal_errors: internal_errors = "%s internal errors, " % len(results.internal_errors) terminal.write.raw( msg.format(passed="%s passed, " % results.passed, error=errors, warning=warnings, skipped="%s skipped, " % results.skipped if results.skipped else '', internal_errors=internal_errors, hosts="on %s hosts" % results.total_hosts)) if results.internal_errors: terminal.write.raw(run_errors % len(results.internal_errors))
def collect(): """ The main collecting entrypoint. This function will call all the pieces needed to build the complete metadata set of a remote system so that checks can consume and verify that data. After collection is done, the full contents of the metadata are available at ``ceph_medic.metadata`` """ cluster_nodes = metadata['nodes'] loader.write('collecting remote node information') total_nodes = 0 failed_nodes = 0 for node_type, nodes in cluster_nodes.items(): for node in nodes: # check if a node type exists for this node before doing any work: try: metadata[node_type] except KeyError: msg = "Skipping node {} from unknown host group: {}".format( node, node_type) logger.warning(msg) continue total_nodes += 1 hostname = node['host'] loader.write('Host: %-20s connection: [%-20s]' % (hostname, terminal.yellow('connecting'))) # TODO: make sure that the hostname is resolvable, trying to # debug SSH issues with execnet is pretty hard/impossible, use # util.net.host_is_resolvable try: logger.debug('attempting connection to host: %s', node['host']) conn = get_connection(node['host']) loader.write('Host: %-20s connection: [%-20s]' % (hostname, terminal.green('connected'))) loader.write('\n') except HostNotFound: logger.exception('connection failed') loader.write('Host: %-20s connection: [%-20s]' % (hostname, terminal.red('failed'))) loader.write('\n') failed_nodes += 1 continue # send the full node metadata for global scope so that the checks # can consume this metadata[node_type][node['host']] = get_node_metadata( conn, hostname, cluster_nodes) conn.exit() if failed_nodes == total_nodes: loader.write(terminal.red('Collection failed!') + ' ' * 70 + '\n') raise RuntimeError( 'All nodes failed to connect. Cannot run any checks') else: loader.write('Collection completed!' + ' ' * 70 + '\n')
def run_cluster(self, module): # XXX get the cluster name here cluster_name = '%s cluster' % metadata.get('cluster_name', 'ceph') terminal.loader.write(' %s' % terminal.yellow(cluster_name)) has_error = False checks = collect_checks(module) for check in checks: try: # TODO: figure out how to skip running a specific check if # the code is ignored, maybe introspecting the function? result = getattr(module, check)() except Exception as error: result = None logger.exception('check had an unhandled error: %s', check) self.internal_errors.append(error) if result: code, message = result # XXX This is not ideal, we shouldn't need to get all the way here # to make sure this is actually ignored. (Or maybe it doesn't matter?) if code in self.ignore: self.skipped += 1 # avoid writing anything else to the terminal, and just # go to the next check continue if not has_error: # XXX get the cluster name here terminal.loader.write(' %s' % terminal.red(cluster_name)) terminal.write.write('\n') if code.startswith('E'): code = terminal.red(code) self.errors += 1 elif code.startswith('W'): code = terminal.yellow(code) self.warnings += 1 terminal.write.write(" %s: %s\n" % (code, message)) has_error = True else: self.passed += 1 if not has_error: terminal.loader.write(' %s\n' % terminal.green(cluster_name))
def report(results): msg = "\n{passed}{failed}{skipped}{errors}{hosts}" if results.failed: msg = terminal.red(msg) elif results.errors: msg = terminal.yellow(msg) else: msg = terminal.green(msg) terminal.write.raw( msg.format(passed="%s passed, " % results.passed, failed="%s failed, " % results.failed if results.failed else '', skipped="%s skipped, " % results.skipped if results.skipped else '', errors="%s errors, " % len(results.errors) if results.errors else '', hosts="on %s hosts" % results.total_hosts)) if results.errors: terminal.write.raw(run_errors)
def run_host(self, host, data, modules): terminal.loader.write(' %s' % terminal.yellow(host)) has_error = False for module in modules: checks = collect_checks(module) for check in checks: try: # TODO: figure out how to skip running a specific check if # the code is ignored, maybe introspecting the function? result = getattr(module, check)(host, data) except Exception as error: logger.exception('check had an unhandled error: %s', check) self.errors.append(error) if result: code, message = result # XXX This is not ideal, we shouldn't need to get all the way here # to make sure this is actually ignored. (Or maybe it doesn't matter?) if code in self.ignore: self.skipped += 1 # avoid writing anything else to the terminal, and just # go to the next check continue self.failed += 1 if not has_error: terminal.loader.write(' %s' % terminal.red(host)) terminal.write.write('\n') if code.startswith('E'): code = terminal.red(code) elif code.startswith('W'): code = terminal.yellow(code) terminal.write.write(" %s: %s" % (code, message)) has_error = True else: self.passed += 1 if not has_error: terminal.loader.write(' %s\n' % terminal.green(host))
def collect(): """ The main collecting entrypoint. This function will call all the pieces needed to build the complete metadata set of a remote system so that checks can consume and verify that data. After collection is done, the full contents of the metadata are available at ``ceph_medic.metadata`` """ cluster_nodes = metadata['nodes'] loader.write('collecting remote node information') total_nodes = 0 failed_nodes = 0 has_cluster_data = False for node_type, nodes in cluster_nodes.items(): for node in nodes: # check if a node type exists for this node before doing any work: try: metadata[node_type] except KeyError: msg = "Skipping node {} from unknown host group: {}".format( node, node_type) logger.warning(msg) continue total_nodes += 1 hostname = node['host'] loader.write('Host: %-40s connection: [%-20s]' % (hostname, terminal.yellow('connecting'))) # TODO: make sure that the hostname is resolvable, trying to # debug SSH issues with execnet is pretty hard/impossible, use # util.net.host_is_resolvable try: logger.debug('attempting connection to host: %s', node['host']) conn = get_connection(node['host'], container=node.get('container')) loader.write('Host: %-40s connection: [%-20s]' % (hostname, terminal.green('connected'))) loader.write('\n') except HostNotFound as err: logger.exception('connection failed') loader.write('Host: %-40s connection: [%-20s]' % (hostname, terminal.red('failed'))) loader.write('\n') failed_nodes += 1 if metadata[node_type].get(hostname): metadata[node_type].pop(hostname) metadata['nodes'][node_type] = [ i for i in metadata['nodes'][node_type] if i['host'] != hostname ] metadata['failed_nodes'].update({hostname: str(err)}) continue # send the full node metadata for global scope so that the checks # can consume this metadata[node_type][hostname] = get_node_metadata( conn, hostname, cluster_nodes) if node_type == 'mons': # if node type is monitor, admin privileges are most likely authorized if not has_cluster_data: cluster_data = collect_cluster(conn) if cluster_data: metadata['cluster'] = cluster_data has_cluster_data = True conn.exit() if failed_nodes == total_nodes: loader.write(terminal.red('Collection failed!') + ' ' * 70) # TODO: this helps clear out the 'loader' line so that the error looks # clean, but this manual clearing should be done automatically terminal.write.raw('') raise RuntimeError( 'All nodes failed to connect. Cannot run any checks') if failed_nodes: loader.write( terminal.yellow( 'Collection completed with some failed connections' + ' ' * 70 + '\n')) else: loader.write('Collection completed!' + ' ' * 70 + '\n')