Esempio n. 1
0
    def main(self):
        options = ['--stdout']
        parser = Transport(self.argv, options=options, check_version=False)
        parser.catch_help = self._help()

        parser.parse_args()

        if len(self.argv) == 1:
            raise SystemExit(
                "A monitor hostname or a ceph.conf file is required as an argument"
            )

        node = self.argv[-1]
        inventory = {}

        with get_connection(node) as conn:
            report = get_mon_report(conn)
            try:
                mons = report['monmap']['mons']
            except KeyError:
                raise SystemExit(report)
            inventory['mons'] = [i['name'] for i in mons]
            osds = report['osd_metadata']
            inventory['osds'] = [i['hostname'] for i in osds]

        if not inventory:
            raise SystemExit(
                'no hosts where found from remote monitor node: %s' % node)

        generate_inventory(inventory, to_stdout=parser.get('--stdout'))
        conn.exit()
        return
Esempio n. 2
0
def collect():
    """
    The main collecting entrypoint. This function will call all the pieces
    needed to build the complete metadata set of a remote system so that checks
    can consume and verify that data.

    After collection is done, the full contents of the metadata are available
    at ``ceph_medic.metadata``
    """
    cluster_nodes = metadata['nodes']
    loader.write('collecting remote node information')
    total_nodes = 0
    failed_nodes = 0
    for node_type, nodes in cluster_nodes.items():
        for node in nodes:
            # check if a node type exists for this node before doing any work:
            try:
                metadata[node_type]
            except KeyError:
                msg = "Skipping node {} from unknown host group: {}".format(
                    node, node_type)
                logger.warning(msg)
                continue

            total_nodes += 1
            hostname = node['host']
            loader.write('Host: %-20s  connection: [%-20s]' %
                         (hostname, terminal.yellow('connecting')))
            # TODO: make sure that the hostname is resolvable, trying to
            # debug SSH issues with execnet is pretty hard/impossible, use
            # util.net.host_is_resolvable
            try:
                logger.debug('attempting connection to host: %s', node['host'])
                conn = get_connection(node['host'])
                loader.write('Host: %-20s  connection: [%-20s]' %
                             (hostname, terminal.green('connected')))
                loader.write('\n')
            except HostNotFound:
                logger.exception('connection failed')
                loader.write('Host: %-20s  connection: [%-20s]' %
                             (hostname, terminal.red('failed')))
                loader.write('\n')
                failed_nodes += 1
                continue

            # send the full node metadata for global scope so that the checks
            # can consume this
            metadata[node_type][node['host']] = get_node_metadata(
                conn, hostname, cluster_nodes)
            conn.exit()
    if failed_nodes == total_nodes:
        loader.write(terminal.red('Collection failed!') + ' ' * 70 + '\n')
        raise RuntimeError(
            'All nodes failed to connect. Cannot run any checks')
    else:
        loader.write('Collection completed!' + ' ' * 70 + '\n')
Esempio n. 3
0
def collect():
    """
    The main collecting entrypoint. This function will call all the pieces
    needed to build the complete metadata set of a remote system so that checks
    can consume and verify that data.

    After collection is done, the full contents of the metadata are available
    at ``ceph_medic.metadata``
    """
    cluster_nodes = metadata['nodes']
    loader.write('collecting remote node information')
    total_nodes = 0
    failed_nodes = 0
    has_cluster_data = False

    for node_type, nodes in cluster_nodes.items():
        for node in nodes:
            # check if a node type exists for this node before doing any work:
            try:
                metadata[node_type]
            except KeyError:
                msg = "Skipping node {} from unknown host group: {}".format(
                    node, node_type)
                logger.warning(msg)
                continue

            total_nodes += 1
            hostname = node['host']
            loader.write('Host: %-40s  connection: [%-20s]' %
                         (hostname, terminal.yellow('connecting')))
            # TODO: make sure that the hostname is resolvable, trying to
            # debug SSH issues with execnet is pretty hard/impossible, use
            # util.net.host_is_resolvable
            try:
                logger.debug('attempting connection to host: %s', node['host'])
                conn = get_connection(node['host'],
                                      container=node.get('container'))
                loader.write('Host: %-40s  connection: [%-20s]' %
                             (hostname, terminal.green('connected')))
                loader.write('\n')
            except HostNotFound as err:
                logger.exception('connection failed')
                loader.write('Host: %-40s  connection: [%-20s]' %
                             (hostname, terminal.red('failed')))
                loader.write('\n')
                failed_nodes += 1
                if metadata[node_type].get(hostname):
                    metadata[node_type].pop(hostname)
                metadata['nodes'][node_type] = [
                    i for i in metadata['nodes'][node_type]
                    if i['host'] != hostname
                ]
                metadata['failed_nodes'].update({hostname: str(err)})
                continue

            # send the full node metadata for global scope so that the checks
            # can consume this
            metadata[node_type][hostname] = get_node_metadata(
                conn, hostname, cluster_nodes)
            if node_type == 'mons':  # if node type is monitor, admin privileges are most likely authorized
                if not has_cluster_data:
                    cluster_data = collect_cluster(conn)
                if cluster_data:
                    metadata['cluster'] = cluster_data
                    has_cluster_data = True
            conn.exit()

    if failed_nodes == total_nodes:
        loader.write(terminal.red('Collection failed!') + ' ' * 70)
        # TODO: this helps clear out the 'loader' line so that the error looks
        # clean, but this manual clearing should be done automatically
        terminal.write.raw('')
        raise RuntimeError(
            'All nodes failed to connect. Cannot run any checks')
    if failed_nodes:
        loader.write(
            terminal.yellow(
                'Collection completed with some failed connections' +
                ' ' * 70 + '\n'))
    else:
        loader.write('Collection completed!' + ' ' * 70 + '\n')