コード例 #1
0
ファイル: collector.py プロジェクト: shreekarSS/ceph-medic
def get_node_metadata(conn, hostname, cluster_nodes):
    # "import" the remote functions so that remote calls using the
    # functions can be executed
    conn.import_module(remote.functions)

    node_metadata = {'ceph': {}}

    # collect paths and files first
    loader.write('Host: %-*s  collecting: [%s]' % (20, hostname, terminal.yellow('paths')))
    node_metadata['paths'] = collect_paths(conn)
    loader.write('Host: %-*s  collecting: [%s]' % (20, hostname, terminal.green('paths')))

    # TODO: collect network information, passing all the cluster_nodes
    # so that it can check for inter-node connectivity
    loader.write('Host: %-*s  collecting: [%s]' % (20, hostname, terminal.yellow('network')))
    node_metadata['network'] = collect_network(cluster_nodes)
    loader.write('Host: %-*s  collecting: [%s]' % (20, hostname, terminal.green('network')))

    # TODO: collect device information
    loader.write('Host: %-*s  collecting: [%s]' % (20, hostname, terminal.yellow('devices')))
    node_metadata['devices'] = collect_devices()
    loader.write('Host: %-*s  collecting: [%s]' % (20, hostname, terminal.green('devices')))

    # collect ceph information
    loader.write('Host: %-*s  collecting: [%s]' % (20, hostname, terminal.yellow('ceph information')))
    node_metadata['ceph'] = collect_ceph_info(conn)
    node_metadata['ceph']['sockets'] = collect_socket_info(conn, node_metadata)
    loader.write('Host: %-*s  collecting: [%s]' % (20, hostname, terminal.green('ceph information')))

    return node_metadata
コード例 #2
0
def report(results):
    msg = "\n{passed}{error}{warning}{skipped}{internal_errors}{hosts}"

    if results.errors:
        msg = terminal.red(msg)
    elif results.warnings:
        msg = terminal.yellow(msg)
    else:
        msg = terminal.green(msg)

    errors = warnings = internal_errors = ''

    if results.errors:
        errors = '%s errors, ' % results.errors if results.errors > 1 else '1 error, '
    if results.warnings:
        warnings = '%s warnings, ' % results.warnings if results.warnings > 1 else '1 warning, '
    if results.internal_errors:
        internal_errors = "%s internal errors, " % len(results.internal_errors)

    terminal.write.raw(
        msg.format(passed="%s passed, " % results.passed,
                   error=errors,
                   warning=warnings,
                   skipped="%s skipped, " %
                   results.skipped if results.skipped else '',
                   internal_errors=internal_errors,
                   hosts="on %s hosts" % results.total_hosts))
    if results.internal_errors:
        terminal.write.raw(run_errors % len(results.internal_errors))
コード例 #3
0
ファイル: collector.py プロジェクト: liuchang0812/ceph-medic
def collect():
    """
    The main collecting entrypoint. This function will call all the pieces
    needed to build the complete metadata set of a remote system so that checks
    can consume and verify that data.

    After collection is done, the full contents of the metadata are available
    at ``ceph_medic.metadata``
    """
    cluster_nodes = metadata['nodes']
    loader.write('collecting remote node information')
    total_nodes = 0
    failed_nodes = 0
    for node_type, nodes in cluster_nodes.items():
        for node in nodes:
            # check if a node type exists for this node before doing any work:
            try:
                metadata[node_type]
            except KeyError:
                msg = "Skipping node {} from unknown host group: {}".format(
                    node, node_type)
                logger.warning(msg)
                continue

            total_nodes += 1
            hostname = node['host']
            loader.write('Host: %-20s  connection: [%-20s]' %
                         (hostname, terminal.yellow('connecting')))
            # TODO: make sure that the hostname is resolvable, trying to
            # debug SSH issues with execnet is pretty hard/impossible, use
            # util.net.host_is_resolvable
            try:
                logger.debug('attempting connection to host: %s', node['host'])
                conn = get_connection(node['host'])
                loader.write('Host: %-20s  connection: [%-20s]' %
                             (hostname, terminal.green('connected')))
                loader.write('\n')
            except HostNotFound:
                logger.exception('connection failed')
                loader.write('Host: %-20s  connection: [%-20s]' %
                             (hostname, terminal.red('failed')))
                loader.write('\n')
                failed_nodes += 1
                continue

            # send the full node metadata for global scope so that the checks
            # can consume this
            metadata[node_type][node['host']] = get_node_metadata(
                conn, hostname, cluster_nodes)
            conn.exit()
    if failed_nodes == total_nodes:
        loader.write(terminal.red('Collection failed!') + ' ' * 70 + '\n')
        raise RuntimeError(
            'All nodes failed to connect. Cannot run any checks')
    else:
        loader.write('Collection completed!' + ' ' * 70 + '\n')
コード例 #4
0
    def run_cluster(self, module):
        # XXX get the cluster name here
        cluster_name = '%s cluster' % metadata.get('cluster_name', 'ceph')
        terminal.loader.write(' %s' % terminal.yellow(cluster_name))
        has_error = False
        checks = collect_checks(module)
        for check in checks:
            try:
                # TODO: figure out how to skip running a specific check if
                # the code is ignored, maybe introspecting the function?
                result = getattr(module, check)()
            except Exception as error:
                result = None
                logger.exception('check had an unhandled error: %s', check)
                self.internal_errors.append(error)
            if result:
                code, message = result
                # XXX This is not ideal, we shouldn't need to get all the way here
                # to make sure this is actually ignored. (Or maybe it doesn't matter?)
                if code in self.ignore:
                    self.skipped += 1
                    # avoid writing anything else to the terminal, and just
                    # go to the next check
                    continue
                if not has_error:
                    # XXX get the cluster name here
                    terminal.loader.write(' %s' % terminal.red(cluster_name))
                    terminal.write.write('\n')

                if code.startswith('E'):
                    code = terminal.red(code)
                    self.errors += 1
                elif code.startswith('W'):
                    code = terminal.yellow(code)
                    self.warnings += 1
                terminal.write.write("   %s: %s\n" % (code, message))
                has_error = True
            else:
                self.passed += 1

        if not has_error:
            terminal.loader.write(' %s\n' % terminal.green(cluster_name))
コード例 #5
0
ファイル: runner.py プロジェクト: liuchang0812/ceph-medic
    def run_host(self, host, data, modules):
        terminal.loader.write(' %s' % terminal.yellow(host))
        has_error = False
        for module in modules:
            checks = collect_checks(module)
            for check in checks:
                try:
                    # TODO: figure out how to skip running a specific check if
                    # the code is ignored, maybe introspecting the function?
                    result = getattr(module, check)(host, data)
                except Exception as error:
                    logger.exception('check had an unhandled error: %s', check)
                    self.errors.append(error)
                if result:
                    code, message = result
                    # XXX This is not ideal, we shouldn't need to get all the way here
                    # to make sure this is actually ignored. (Or maybe it doesn't matter?)
                    if code in self.ignore:
                        self.skipped += 1
                        # avoid writing anything else to the terminal, and just
                        # go to the next check
                        continue
                    self.failed += 1
                    if not has_error:
                        terminal.loader.write(' %s' % terminal.red(host))
                    terminal.write.write('\n')

                    if code.startswith('E'):
                        code = terminal.red(code)
                    elif code.startswith('W'):
                        code = terminal.yellow(code)
                    terminal.write.write("   %s: %s" % (code, message))
                    has_error = True
                else:
                    self.passed += 1

        if not has_error:
            terminal.loader.write(' %s\n' % terminal.green(host))
コード例 #6
0
ファイル: runner.py プロジェクト: shreekarSS/ceph-medic
def report(results):
    msg = "\n{passed}{failed}{skipped}{errors}{hosts}"

    if results.failed:
        msg = terminal.red(msg)
    elif results.errors:
        msg = terminal.yellow(msg)
    else:
        msg = terminal.green(msg)

    terminal.write.raw(
        msg.format(passed="%s passed, " % results.passed,
                   failed="%s failed, " %
                   results.failed if results.failed else '',
                   skipped="%s skipped, " %
                   results.skipped if results.skipped else '',
                   errors="%s errors, " %
                   len(results.errors) if results.errors else '',
                   hosts="on %s hosts" % results.total_hosts))
    if results.errors:
        terminal.write.raw(run_errors)
コード例 #7
0
ファイル: runner.py プロジェクト: shreekarSS/ceph-medic
                    if code.startswith('E'):
                        code = terminal.red(code)
                    elif code.startswith('W'):
                        code = terminal.yellow(code)
                    terminal.write.write("   %s: %s\n" % (code, message))
                    has_error = True
                else:
                    self.passed += 1

        if not has_error:
            terminal.loader.write(' %s\n' % terminal.green(host))


run_errors = terminal.yellow("""
While running checks, ceph-medic had unhandled errors, please look at the
configured log file and report the issue along with the traceback.
""")


def report(results):
    msg = "\n{passed}{failed}{skipped}{errors}{hosts}"

    if results.failed:
        msg = terminal.red(msg)
    elif results.errors:
        msg = terminal.yellow(msg)
    else:
        msg = terminal.green(msg)

    terminal.write.raw(
        msg.format(passed="%s passed, " % results.passed,
コード例 #8
0
def collect():
    """
    The main collecting entrypoint. This function will call all the pieces
    needed to build the complete metadata set of a remote system so that checks
    can consume and verify that data.

    After collection is done, the full contents of the metadata are available
    at ``ceph_medic.metadata``
    """
    cluster_nodes = metadata['nodes']
    loader.write('collecting remote node information')
    total_nodes = 0
    failed_nodes = 0
    has_cluster_data = False

    for node_type, nodes in cluster_nodes.items():
        for node in nodes:
            # check if a node type exists for this node before doing any work:
            try:
                metadata[node_type]
            except KeyError:
                msg = "Skipping node {} from unknown host group: {}".format(
                    node, node_type)
                logger.warning(msg)
                continue

            total_nodes += 1
            hostname = node['host']
            loader.write('Host: %-40s  connection: [%-20s]' %
                         (hostname, terminal.yellow('connecting')))
            # TODO: make sure that the hostname is resolvable, trying to
            # debug SSH issues with execnet is pretty hard/impossible, use
            # util.net.host_is_resolvable
            try:
                logger.debug('attempting connection to host: %s', node['host'])
                conn = get_connection(node['host'],
                                      container=node.get('container'))
                loader.write('Host: %-40s  connection: [%-20s]' %
                             (hostname, terminal.green('connected')))
                loader.write('\n')
            except HostNotFound as err:
                logger.exception('connection failed')
                loader.write('Host: %-40s  connection: [%-20s]' %
                             (hostname, terminal.red('failed')))
                loader.write('\n')
                failed_nodes += 1
                if metadata[node_type].get(hostname):
                    metadata[node_type].pop(hostname)
                metadata['nodes'][node_type] = [
                    i for i in metadata['nodes'][node_type]
                    if i['host'] != hostname
                ]
                metadata['failed_nodes'].update({hostname: str(err)})
                continue

            # send the full node metadata for global scope so that the checks
            # can consume this
            metadata[node_type][hostname] = get_node_metadata(
                conn, hostname, cluster_nodes)
            if node_type == 'mons':  # if node type is monitor, admin privileges are most likely authorized
                if not has_cluster_data:
                    cluster_data = collect_cluster(conn)
                if cluster_data:
                    metadata['cluster'] = cluster_data
                    has_cluster_data = True
            conn.exit()

    if failed_nodes == total_nodes:
        loader.write(terminal.red('Collection failed!') + ' ' * 70)
        # TODO: this helps clear out the 'loader' line so that the error looks
        # clean, but this manual clearing should be done automatically
        terminal.write.raw('')
        raise RuntimeError(
            'All nodes failed to connect. Cannot run any checks')
    if failed_nodes:
        loader.write(
            terminal.yellow(
                'Collection completed with some failed connections' +
                ' ' * 70 + '\n'))
    else:
        loader.write('Collection completed!' + ' ' * 70 + '\n')