Exemple #1
0
def mon_status_check(conn, logger, hostname, args):
    """
    A direct check for JSON output on the monitor status.

    For newer versions of Ceph (dumpling and newer) a new mon_status command
    was added ( `ceph daemon mon mon_status` ) and should be revisited if the
    output changes as this check depends on that availability.

    """
    asok_path = paths.mon.asok(args.cluster, hostname)

    out, err, code = process.check(
        conn,
        [
            'ceph',
            '--cluster={cluster}'.format(cluster=args.cluster),
            '--admin-daemon',
            asok_path,
            'mon_status',
        ],
    )

    for line in err:
        logger.error(line)

    try:
        return json.loads(''.join(out))
    except ValueError:
        return {}
Exemple #2
0
def can_connect_passwordless(hostname):
    """
    Ensure that current host can SSH remotely to the remote
    host using the ``BatchMode`` option to prevent a password prompt.

    That attempt will error with an exit status of 255 and a ``Permission
    denied`` message.
    """
    # Ensure we are not doing this for local hosts
    if not needs_ssh(hostname):
        return True

    logger = logging.getLogger(hostname)
    with get_local_connection(logger) as conn:
        # Check to see if we can login, disabling password prompts
        command = ['ssh', '-CT', '-o', 'BatchMode=yes', hostname]
        out, err, retval = process.check(conn, command, stop_on_error=False)
        expected_error = 'Permission denied (publickey,password)'
        has_key_error = False
        for line in err:
            if expected_error in line:
                has_key_error = True

        if retval == 255 and has_key_error:
            return False
    return True
Exemple #3
0
def is_running(conn, args):
    """
    Run a command to check the status of a mon, return a boolean.

    We heavily depend on the format of the output, if that ever changes
    we need to modify this.
    Check daemon status for 3 times
    output of the status should be similar to::

        mon.mira094: running {"version":"0.61.5"}

    or when it fails::

        mon.mira094: dead {"version":"0.61.5"}
        mon.mira094: not running {"version":"0.61.5"}
    """
    stdout, stderr, _ = process.check(
        conn,
        args
    )
    result_string = ' '.join(stdout)
    for run_check in [': running', ' start/running']:
        if run_check in result_string:
            return True
    return False
Exemple #4
0
def can_connect_passwordless(hostname):
    """
    Ensure that current host can SSH remotely to the remote
    host using the ``BatchMode`` option to prevent a password prompt.

    That attempt will error with an exit status of 255 and a ``Permission
    denied`` message.
    """
    # Ensure we are not doing this for local hosts
    if not needs_ssh(hostname):
        return True

    logger = logging.getLogger(hostname)
    with get_local_connection(logger) as conn:
        # Check to see if we can login, disabling password prompts
        command = ['ssh', '-CT', '-o', 'BatchMode=yes', hostname]
        out, err, retval = process.check(conn, command, stop_on_error=False)
        expected_error = 'Permission denied '
        has_key_error = False
        for line in err:
            if expected_error in line:
                has_key_error = True

        if retval == 255 and has_key_error:
            return False
    return True
Exemple #5
0
def osd_status_check(conn, cluster):
    """
    Check the status of an OSD. Make sure all are up and in

    What good output would look like::

        {
            "epoch": 8,
            "num_osds": 1,
            "num_up_osds": 1,
            "num_in_osds": "1",
            "full": "false",
            "nearfull": "false"
        }

    Note how the booleans are actually strings, so we need to take that into
    account and fix it before returning the dictionary. Issue #8108
    """
    command = [
        'ceph',
        '--cluster={cluster}'.format(cluster=cluster),
        'osd',
        'stat',
        '--format=json',
    ]

    try:
        out, err, code = process.check(
            conn,
            command,
        )
    except TypeError:
        # XXX This is a bug in remoto. If the other end disconnects with a timeout
        # it will return a None, and here we are expecting a 3 item tuple, not a None
        # so it will break with a TypeError. Once remoto fixes this, we no longer need
        # this try/except.
        return {}

    try:
        loaded_json = json.loads(''.join(out))
        # convert boolean strings to actual booleans because
        # --format=json fails to do this properly
        for k, v in loaded_json.items():
            if v == 'true':
                loaded_json[k] = True
            elif v == 'false':
                loaded_json[k] = False
        return loaded_json
    except ValueError:
        return {}
Exemple #6
0
def osd_status_check(conn, cluster):
    """
    Check the status of an OSD. Make sure all are up and in

    What good output would look like::

        {
            "epoch": 8,
            "num_osds": 1,
            "num_up_osds": 1,
            "num_in_osds": "1",
            "full": "false",
            "nearfull": "false"
        }

    Note how the booleans are actually strings, so we need to take that into
    account and fix it before returning the dictionary. Issue #8108
    """
    command = [
        'ceph',
        '--cluster={cluster}'.format(cluster=cluster),
        'osd',
        'stat',
        '--format=json',
    ]

    try:
        out, err, code = process.check(
            conn,
            command,
        )
    except TypeError:
        # XXX This is a bug in remoto. If the other end disconnects with a timeout
        # it will return a None, and here we are expecting a 3 item tuple, not a None
        # so it will break with a TypeError. Once remoto fixes this, we no longer need
        # this try/except.
        return {}

    try:
        loaded_json = json.loads(''.join(out))
        # convert boolean strings to actual booleans because
        # --format=json fails to do this properly
        for k, v in loaded_json.items():
            if v == 'true':
                loaded_json[k] = True
            elif v == 'false':
                loaded_json[k] = False
        return loaded_json
    except ValueError:
        return {}
Exemple #7
0
def osd_tree(conn, cluster):
    """
    Check the status of an OSD. Make sure all are up and in

    What good output would look like::

        {
            "epoch": 8,
            "num_osds": 1,
            "num_up_osds": 1,
            "num_in_osds": "1",
            "full": "false",
            "nearfull": "false"
        }

    Note how the booleans are actually strings, so we need to take that into
    account and fix it before returning the dictionary. Issue #8108
    """
    command = [
        'ceph',
        '--cluster={cluster}'.format(cluster=cluster),
        'osd',
        'tree',
        '--format=json',
    ]

    out, err, code = process.check(
        conn,
        command,
    )

    try:
        loaded_json = json.loads(''.join(out))
        # convert boolean strings to actual booleans because
        # --format=json fails to do this properly
        for k, v in loaded_json.items():
            if v == 'true':
                loaded_json[k] = True
            elif v == 'false':
                loaded_json[k] = False
        return loaded_json
    except ValueError:
        return {}
Exemple #8
0
def osd_tree(conn, cluster):
    """
    Check the status of an OSD. Make sure all are up and in

    What good output would look like::

        {
            "epoch": 8,
            "num_osds": 1,
            "num_up_osds": 1,
            "num_in_osds": "1",
            "full": "false",
            "nearfull": "false"
        }

    Note how the booleans are actually strings, so we need to take that into
    account and fix it before returning the dictionary. Issue #8108
    """
    command = [
        'ceph',
        '--cluster={cluster}'.format(cluster=cluster),
        'osd',
        'tree',
        '--format=json',
    ]

    out, err, code = process.check(
        conn,
        command,
    )

    try:
        loaded_json = json.loads(''.join(out))
        # convert boolean strings to actual booleans because
        # --format=json fails to do this properly
        for k, v in loaded_json.items():
            if v == 'true':
                loaded_json[k] = True
            elif v == 'false':
                loaded_json[k] = False
        return loaded_json
    except ValueError:
        return {}
Exemple #9
0
def is_running(conn, args):
    """
    Run a command to check the status of a mon, return a boolean.

    We heavily depend on the format of the output, if that ever changes
    we need to modify this.
    Check daemon status for 3 times
    output of the status should be similar to::

        mon.mira094: running {"version":"0.61.5"}

    or when it fails::

        mon.mira094: dead {"version":"0.61.5"}
        mon.mira094: not running {"version":"0.61.5"}
    """
    stdout, stderr, _ = process.check(conn, args)
    result_string = ' '.join(stdout)
    for run_check in [': running', ' start/running']:
        if run_check in result_string:
            return True
    return False
Exemple #10
0
def osd_list(args, cfg):
    # FIXME: this portion should probably be abstracted. We do the same in
    # mon.py
    cfg = conf.ceph.load(args)
    mon_initial_members = cfg.safe_get('global', 'mon_initial_members')
    monitors = re.split(r'[,\s]+', mon_initial_members)

    if not monitors:
        raise exc.NeedHostError(
            'could not find `mon initial members` defined in ceph.conf')

    # get the osd tree from a monitor host
    mon_host = monitors[0]
    distro = hosts.get(mon_host, username=args.username)
    tree = osd_tree(distro.conn, args.cluster)
    distro.conn.exit()

    interesting_files = ['active', 'magic', 'whoami', 'journal_uuid']

    for hostname, disk, journal in args.disk:
        distro = hosts.get(hostname, username=args.username)
        remote_module = distro.conn.remote_module
        osds = distro.conn.remote_module.listdir(constants.osd_path)

        output, err, exit_code = process.check(distro.conn, [
            'ceph-disk',
            'list',
        ])

        for _osd in osds:
            osd_path = os.path.join(constants.osd_path, _osd)
            journal_path = os.path.join(osd_path, 'journal')
            _id = int(_osd.split('-')[-1])  # split on dash, get the id
            osd_name = 'osd.%s' % _id
            metadata = {}
            json_blob = {}

            # piggy back from ceph-disk and get the mount point
            device = get_osd_mount_point(output, osd_name)
            if device:
                metadata['device'] = device

            # read interesting metadata from files
            for f in interesting_files:
                osd_f_path = os.path.join(osd_path, f)
                if remote_module.path_exists(osd_f_path):
                    metadata[f] = remote_module.readline(osd_f_path)

            # do we have a journal path?
            if remote_module.path_exists(journal_path):
                metadata['journal path'] = remote_module.get_realpath(
                    journal_path)

            # is this OSD in osd tree?
            for blob in tree['nodes']:
                if blob.get('id') == _id:  # matches our OSD
                    json_blob = blob

            print_osd(
                distro.conn.logger,
                hostname,
                osd_path,
                json_blob,
                metadata,
            )

        distro.conn.exit()
Exemple #11
0
def osd_list(args, cfg):
    # FIXME: this portion should probably be abstracted. We do the same in
    # mon.py
    cfg = conf.ceph.load(args)
    mon_initial_members = cfg.safe_get('global', 'mon_initial_members')
    monitors = re.split(r'[,\s]+', mon_initial_members)

    if not monitors:
        raise exc.NeedHostError(
            'could not find `mon initial members` defined in ceph.conf'
        )

    # get the osd tree from a monitor host
    mon_host = monitors[0]
    distro = hosts.get(mon_host, username=args.username)
    tree = osd_tree(distro.conn, args.cluster)
    distro.conn.exit()

    interesting_files = ['active', 'magic', 'whoami', 'journal_uuid']

    for hostname, disk, journal in args.disk:
        distro = hosts.get(hostname, username=args.username)
        remote_module = distro.conn.remote_module
        osds = distro.conn.remote_module.listdir(constants.osd_path)

        output, err, exit_code = process.check(
            distro.conn,
            [
                'ceph-disk',
                'list',
            ]
        )

        for _osd in osds:
            osd_path = os.path.join(constants.osd_path, _osd)
            journal_path = os.path.join(osd_path, 'journal')
            _id = int(_osd.split('-')[-1])  # split on dash, get the id
            osd_name = 'osd.%s' % _id
            metadata = {}
            json_blob = {}

            # piggy back from ceph-disk and get the mount point
            device = get_osd_mount_point(output, osd_name)
            if device:
                metadata['device'] = device

            # read interesting metadata from files
            for f in interesting_files:
                osd_f_path = os.path.join(osd_path, f)
                if remote_module.path_exists(osd_f_path):
                    metadata[f] = remote_module.readline(osd_f_path)

            # do we have a journal path?
            if remote_module.path_exists(journal_path):
                metadata['journal path'] = remote_module.get_realpath(journal_path)

            # is this OSD in osd tree?
            for blob in tree['nodes']:
                if blob.get('id') == _id:  # matches our OSD
                    json_blob = blob

            print_osd(
                distro.conn.logger,
                hostname,
                osd_path,
                json_blob,
                metadata,
            )

        distro.conn.exit()
Exemple #12
0
def purgedata(args):
    LOG.debug(
        'Purging data from cluster %s hosts %s',
        args.cluster,
        ' '.join(args.host),
        )

    installed_hosts = []
    for hostname in args.host:
        distro = hosts.get(hostname, username=args.username)
        ceph_is_installed = distro.conn.remote_module.which('ceph')
        if ceph_is_installed:
            installed_hosts.append(hostname)
        distro.conn.exit()

    if installed_hosts:
        LOG.error("ceph is still installed on: %s", installed_hosts)
        raise RuntimeError("refusing to purge data while ceph is still installed")

    for hostname in args.host:
        distro = hosts.get(hostname, username=args.username)
        LOG.info(
            'Distro info: %s %s %s',
            distro.name,
            distro.release,
            distro.codename
        )

        rlogger = logging.getLogger(hostname)
        rlogger.info('purging data on %s' % hostname)

        # Try to remove the contents of /var/lib/ceph first, don't worry
        # about errors here, we deal with them later on
        process.check(
            distro.conn,
            [
                'rm', '-rf', '--one-file-system', '--', '/var/lib/ceph',
            ]
        )

        # If we failed in the previous call, then we probably have OSDs
        # still mounted, so we unmount them here
        if distro.conn.remote_module.path_exists('/var/lib/ceph'):
            rlogger.warning(
                'OSDs may still be mounted, trying to unmount them'
            )
            process.run(
                distro.conn,
                [
                    'find', '/var/lib/ceph',
                    '-mindepth', '1',
                    '-maxdepth', '2',
                    '-type', 'd',
                    '-exec', 'umount', '{}', ';',
                ]
            )

            # And now we try again to remove the contents, since OSDs should be
            # unmounted, but this time we do check for errors
            process.run(
                distro.conn,
                [
                    'rm', '-rf', '--one-file-system', '--', '/var/lib/ceph',
                ]
            )

        process.run(
            distro.conn,
            [
                'rm', '-rf', '--one-file-system', '--', '/etc/ceph/',
            ]
        )

        distro.conn.exit()
Exemple #13
0
def create_mds(conn, name, cluster, init):

    path = '/var/lib/ceph/mds/{cluster}-{name}'.format(cluster=cluster,
                                                       name=name)

    conn.remote_module.safe_mkdir(path)

    bootstrap_keyring = '/var/lib/ceph/bootstrap-mds/{cluster}.keyring'.format(
        cluster=cluster)

    keypath = os.path.join(path, 'keyring')

    stdout, stderr, returncode = process.check(conn, [
        'ceph',
        '--cluster',
        cluster,
        '--name',
        'client.bootstrap-mds',
        '--keyring',
        bootstrap_keyring,
        'auth',
        'get-or-create',
        'mds.{name}'.format(name=name),
        'osd',
        'allow rwx',
        'mds',
        'allow',
        'mon',
        'allow profile mds',
        '-o',
        os.path.join(keypath),
    ])
    if returncode > 0 and returncode != errno.EACCES:
        for line in stderr:
            conn.logger.error(line)
        for line in stdout:
            # yes stdout as err because this is an error
            conn.logger.error(line)
        conn.logger.error('exit code from command was: %s' % returncode)
        raise RuntimeError('could not create mds')

        process.check(conn, [
            'ceph',
            '--cluster',
            cluster,
            '--name',
            'client.bootstrap-mds',
            '--keyring',
            bootstrap_keyring,
            'auth',
            'get-or-create',
            'mds.{name}'.format(name=name),
            'osd',
            'allow *',
            'mds',
            'allow',
            'mon',
            'allow rwx',
            '-o',
            os.path.join(keypath),
        ])

    conn.remote_module.touch_file(os.path.join(path, 'done'))
    conn.remote_module.touch_file(os.path.join(path, init))

    if init == 'upstart':
        process.run(conn, [
            'initctl',
            'emit',
            'ceph-mds',
            'cluster={cluster}'.format(cluster=cluster),
            'id={name}'.format(name=name),
        ],
                    timeout=7)
    elif init == 'sysvinit':
        process.run(conn, [
            'service',
            'ceph',
            'start',
            'mds.{name}'.format(name=name),
        ],
                    timeout=7)
Exemple #14
0
def create_mds(conn, name, cluster, init):

    path = '/var/lib/ceph/mds/{cluster}-{name}'.format(
        cluster=cluster,
        name=name
        )

    conn.remote_module.safe_mkdir(path)

    bootstrap_keyring = '/var/lib/ceph/bootstrap-mds/{cluster}.keyring'.format(
        cluster=cluster
        )

    keypath = os.path.join(path, 'keyring')

    stdout, stderr, returncode = process.check(
        conn,
        [
            'ceph',
            '--cluster', cluster,
            '--name', 'client.bootstrap-mds',
            '--keyring', bootstrap_keyring,
            'auth', 'get-or-create', 'mds.{name}'.format(name=name),
            'osd', 'allow rwx',
            'mds', 'allow',
            'mon', 'allow profile mds',
            '-o',
            os.path.join(keypath),
        ]
    )
    if returncode > 0 and returncode != errno.EACCES:
        for line in stderr:
            conn.logger.error(line)
        for line in stdout:
            # yes stdout as err because this is an error
            conn.logger.error(line)
        conn.logger.error('exit code from command was: %s' % returncode)
        raise RuntimeError('could not create mds')

        process.check(
            conn,
            [
                'ceph',
                '--cluster', cluster,
                '--name', 'client.bootstrap-mds',
                '--keyring', bootstrap_keyring,
                'auth', 'get-or-create', 'mds.{name}'.format(name=name),
                'osd', 'allow *',
                'mds', 'allow',
                'mon', 'allow rwx',
                '-o',
                os.path.join(keypath),
            ]
        )

    conn.remote_module.touch_file(os.path.join(path, 'done'))
    conn.remote_module.touch_file(os.path.join(path, init))

    if init == 'upstart':
        process.run(
            conn,
            [
                'initctl',
                'emit',
                'ceph-mds',
                'cluster={cluster}'.format(cluster=cluster),
                'id={name}'.format(name=name),
            ],
            timeout=7
        )
    elif init == 'sysvinit':
        process.run(
            conn,
            [
                'service',
                'ceph',
                'start',
                'mds.{name}'.format(name=name),
            ],
            timeout=7
        )
Exemple #15
0
def purgedata(args):
    LOG.debug(
        'Purging data from cluster %s hosts %s',
        args.cluster,
        ' '.join(args.host),
        )

    installed_hosts = []
    for hostname in args.host:
        distro = hosts.get(hostname, username=args.username)
        ceph_is_installed = distro.conn.remote_module.which('ceph')
        if ceph_is_installed:
            installed_hosts.append(hostname)
        distro.conn.exit()

    if installed_hosts:
        LOG.error("ceph is still installed on: %s", installed_hosts)
        raise RuntimeError("refusing to purge data while ceph is still installed")

    for hostname in args.host:
        distro = hosts.get(hostname, username=args.username)
        LOG.info(
            'Distro info: %s %s %s',
            distro.name,
            distro.release,
            distro.codename
        )

        rlogger = logging.getLogger(hostname)
        rlogger.info('purging data on %s' % hostname)

        # Try to remove the contents of /var/lib/ceph first, don't worry
        # about errors here, we deal with them later on
        process.check(
            distro.conn,
            [
                'rm', '-rf', '--one-file-system', '--', '/var/lib/ceph',
            ]
        )

        # If we failed in the previous call, then we probably have OSDs
        # still mounted, so we unmount them here
        if distro.conn.remote_module.path_exists('/var/lib/ceph'):
            rlogger.warning(
                'OSDs may still be mounted, trying to unmount them'
            )
            process.run(
                distro.conn,
                [
                    'find', '/var/lib/ceph',
                    '-mindepth', '1',
                    '-maxdepth', '2',
                    '-type', 'd',
                    '-exec', 'umount', '{}', ';',
                ]
            )

            # And now we try again to remove the contents, since OSDs should be
            # unmounted, but this time we do check for errors
            process.run(
                distro.conn,
                [
                    'rm', '-rf', '--one-file-system', '--', '/var/lib/ceph',
                ]
            )

        process.run(
            distro.conn,
            [
                'rm', '-rf', '--one-file-system', '--', '/etc/ceph/',
            ]
        )

        distro.conn.exit()