def rollback_osd(args, osd_id=None): """ When the process of creating or preparing fails, the OSD needs to be destroyed so that the ID cane be reused. This is prevents leaving the ID around as "used" on the monitor, which can cause confusion if expecting sequential OSD IDs. The usage of `destroy-new` allows this to be done without requiring the admin keyring (otherwise needed for destroy and purge commands) """ if not osd_id: # it means that it wasn't generated, so there is nothing to rollback here return # once here, this is an error condition that needs to be rolled back terminal.error('Was unable to complete a new OSD, will rollback changes') osd_name = 'osd.%s' bootstrap_keyring = '/var/lib/ceph/bootstrap-osd/%s.keyring' % conf.cluster cmd = [ 'ceph', '--cluster', conf.cluster, '--name', 'client.bootstrap-osd', '--keyring', bootstrap_keyring, 'osd', 'purge-new', osd_name % osd_id, '--yes-i-really-mean-it', ] process.run(cmd)
def dmcrypt_close(mapping): """ Encrypt (close) a device, previously decrypted with cryptsetup :param mapping: """ process.run(['cryptsetup', 'remove', mapping])
def clear_tags(self): """ Removes all tags from the Logical Volume. """ for k, v in self.tags.items(): tag = "%s=%s" % (k, v) process.run(['lvchange', '--deltag', tag, self.lv_path])
def create_vg(devices, name=None, name_prefix=None): """ Create a Volume Group. Command looks like:: vgcreate --force --yes group_name device Once created the volume group is returned as a ``VolumeGroup`` object :param devices: A list of devices to create a VG. Optionally, a single device (as a string) can be used. :param name: Optionally set the name of the VG, defaults to 'ceph-{uuid}' :param name_prefix: Optionally prefix the name of the VG, which will get combined with a UUID string """ if isinstance(devices, set): devices = list(devices) if not isinstance(devices, list): devices = [devices] if name_prefix: name = "%s-%s" % (name_prefix, str(uuid.uuid4())) elif name is None: name = "ceph-%s" % str(uuid.uuid4()) process.run([ 'vgcreate', '--force', '--yes', name] + devices ) vg = get_vg(vg_name=name) return vg
def mount_tmpfs(path): process.run([ 'mount', '-t', 'tmpfs', 'tmpfs', path ])
def link_journal(journal_device, osd_id): journal_path = '/var/lib/ceph/osd/%s-%s/journal' % ( conf.cluster, osd_id ) command = ['sudo', 'ln', '-s', journal_device, journal_path] process.run(command)
def __exit__(self, exc_type, exc_val, exc_tb): process.run([ 'sudo', 'umount', '-v', self.path ])
def rollback_osd(args, osd_id=None): """ When the process of creating or preparing fails, the OSD needs to be either purged (ID fully removed) or destroyed (ID persists). This is important because otherwise it would leave the ID around, which can cause confusion if relying on the automatic (OSD.N + 1) behavior. When the OSD id is specified in the command line explicitly (with ``--osd-id``) , the the ID is then preserved with a soft removal (``ceph osd destroy``), otherwise it is fully removed with ``purge``. """ if not osd_id: # it means that it wasn't generated, so there is nothing to rollback here return # once here, this is an error condition that needs to be rolled back terminal.error('Was unable to complete a new OSD, will rollback changes') osd_name = 'osd.%s' if args.osd_id is None: terminal.error('OSD will be fully purged from the cluster, because the ID was generated') # the ID wasn't passed in explicitly, so make sure it is fully removed process.run([ 'ceph', 'osd', 'purge', osd_name % osd_id, '--yes-i-really-mean-it']) else: terminal.error('OSD will be destroyed, keeping the ID because it was provided with --osd-id') # the ID was passed explicitly, so allow to re-use by using `destroy` process.run([ 'ceph', 'osd', 'destroy', osd_name % args.osd_id, '--yes-i-really-mean-it'])
def set_context(path, recursive = False): # restore selinux context to default policy values if which('restorecon').startswith('/'): if recursive: process.run(['restorecon', '-R', path]) else: process.run(['restorecon', path])
def remove_pv(pv_name): """ Removes a physical volume using a double `-f` to prevent prompts and fully remove anything related to LVM. This is tremendously destructive, but so is all other actions when zapping a device. In the case where multiple PVs are found, it will ignore that fact and continue with the removal, specifically in the case of messages like:: WARNING: PV $UUID /dev/DEV-1 was already found on /dev/DEV-2 These situations can be avoided with custom filtering rules, which this API cannot handle while accommodating custom user filters. """ fail_msg = "Unable to remove vg %s" % pv_name process.run( [ 'pvremove', '-v', # verbose '-f', # force it '-f', # force it pv_name ], fail_msg=fail_msg, )
def osd_mkfs_filestore(osd_id, fsid): """ Create the files for the OSD to function. A normal call will look like: ceph-osd --cluster ceph --mkfs --mkkey -i 0 \ --monmap /var/lib/ceph/osd/ceph-0/activate.monmap \ --osd-data /var/lib/ceph/osd/ceph-0 \ --osd-journal /var/lib/ceph/osd/ceph-0/journal \ --osd-uuid 8d208665-89ae-4733-8888-5d3bfbeeec6c \ --keyring /var/lib/ceph/osd/ceph-0/keyring \ --setuser ceph --setgroup ceph """ path = '/var/lib/ceph/osd/%s-%s/' % (conf.cluster, osd_id) monmap = os.path.join(path, 'activate.monmap') journal = os.path.join(path, 'journal') system.chown(journal) system.chown(path) process.run([ 'ceph-osd', '--cluster', conf.cluster, # undocumented flag, sets the `type` file to contain 'filestore' '--osd-objectstore', 'filestore', '--mkfs', '-i', osd_id, '--monmap', monmap, '--osd-data', path, '--osd-journal', journal, '--osd-uuid', fsid, '--setuser', 'ceph', '--setgroup', 'ceph' ])
def activate_filestore(lvs): # find the osd osd_lv = lvs.get(lv_tags={'ceph.type': 'data'}) if not osd_lv: raise RuntimeError('Unable to find a data LV for filestore activation') is_encrypted = osd_lv.tags.get('ceph.encrypted', '0') == '1' osd_id = osd_lv.tags['ceph.osd_id'] conf.cluster = osd_lv.tags['ceph.cluster_name'] # it may have a volume with a journal osd_journal_lv = lvs.get(lv_tags={'ceph.type': 'journal'}) # TODO: add sensible error reporting if this is ever the case # blow up with a KeyError if this doesn't exist osd_fsid = osd_lv.tags['ceph.osd_fsid'] if not osd_journal_lv: # must be a disk partition, by quering blkid by the uuid we are ensuring that the # device path is always correct journal_uuid = osd_lv.tags['ceph.journal_uuid'] osd_journal = disk.get_device_from_partuuid(journal_uuid) else: journal_uuid = osd_journal_lv.lv_uuid osd_journal = osd_lv.tags['ceph.journal_device'] if not osd_journal: raise RuntimeError('unable to detect an lv or device journal for OSD %s' % osd_id) # this is done here, so that previous checks that ensure path availability # and correctness can still be enforced, and report if any issues are found if is_encrypted: lockbox_secret = osd_lv.tags['ceph.cephx_lockbox_secret'] # this keyring writing is idempotent encryption_utils.write_lockbox_keyring(osd_id, osd_fsid, lockbox_secret) dmcrypt_secret = encryption_utils.get_dmcrypt_key(osd_id, osd_fsid) encryption_utils.luks_open(dmcrypt_secret, osd_lv.lv_path, osd_lv.lv_uuid) encryption_utils.luks_open(dmcrypt_secret, osd_journal, journal_uuid) osd_journal = '/dev/mapper/%s' % journal_uuid source = '/dev/mapper/%s' % osd_lv.lv_uuid else: source = osd_lv.lv_path # mount the osd destination = '/var/lib/ceph/osd/%s-%s' % (conf.cluster, osd_id) if not system.device_is_mounted(source, destination=destination): process.run(['mount', '-v', source, destination]) # always re-do the symlink regardless if it exists, so that the journal # device path that may have changed can be mapped correctly every time destination = '/var/lib/ceph/osd/%s-%s/journal' % (conf.cluster, osd_id) process.run(['ln', '-snf', osd_journal, destination]) # make sure that the journal has proper permissions system.chown(osd_journal) # enable the ceph-volume unit for this OSD systemctl.enable_volume(osd_id, osd_fsid, 'lvm') # start the OSD systemctl.start_osd(osd_id) terminal.success("ceph-volume lvm activate successful for osd ID: %s" % osd_id)
def mask_ceph_disk(): # systemctl allows using a glob like '*' for masking, but there was a bug # in that it wouldn't allow this for service templates. This means that # masking ceph-disk@* will not work, so we must link the service directly. # /etc/systemd takes precedence regardless of the location of the unit process.run( ['ln', '-sf', '/dev/null', '/etc/systemd/system/[email protected]'] )
def main(args=None): """ Main entry point for the ``ceph-volume-systemd`` executable. ``args`` are optional for easier testing of arguments. Expected input is similar to:: ['/path/to/ceph-volume-systemd', '<osd id>-<osd uuid>-<device type>'] ['/path/to/ceph-volume-systemd', '<type>-<extra metadata>'] For example:: [ '/usr/bin/ceph-volume-systemd', 'lvm-0-8715BEB4-15C5-49DE-BA6F-401086EC7B41' ] The first part of the argument is the only interesting bit, which contains the metadata needed to proxy the call to ``ceph-volume`` itself. Reusing the example, the proxy call to ``ceph-volume`` would look like:: ceph-volume lvm trigger 0-8715BEB4-15C5-49DE-BA6F-401086EC7B41 That means that ``lvm`` is used as the subcommand and it is **expected** that a ``trigger`` sub-commmand will be present to make sense of the extra piece of the string. """ log.setup(name='ceph-volume-systemd.log', log_path='/var/log/ceph/ceph-volume-systemd.log') logger = logging.getLogger('systemd') args = args if args is not None else sys.argv try: suffix = args[-1] except IndexError: raise RuntimeError('no arguments supplied') sub_command = parse_subcommand(suffix) extra_data = parse_extra_data(suffix) logger.info('raw systemd input received: %s', suffix) logger.info('parsed sub-command: %s, extra data: %s', sub_command, extra_data) command = ['ceph-volume', sub_command, 'trigger', extra_data] tries = os.environ.get('CEPH_VOLUME_SYSTEMD_TRIES', 30) interval = os.environ.get('CEPH_VOLUME_SYSTEMD_INTERVAL', 5) while tries > 0: try: # don't log any output to the terminal, just rely on stderr/stdout # going to logging process.run(command, terminal_logging=False) logger.info('successfully trggered activation for: %s', extra_data) break except RuntimeError as error: logger.warning(error) logger.warning('failed activating OSD, retries left: %s', tries) tries -= 1 time.sleep(interval)
def __enter__(self): self.path = tempfile.mkdtemp() process.run([ 'mount', '-v', self.device, self.path ]) return self.path
def unmount(path): """ Removes mounts at the given path """ process.run([ 'umount', '-v', path, ])
def wipefs(path): """ Removes the filesystem from an lv or partition. """ process.run([ 'wipefs', '--all', path ])
def mount_tmpfs(path): process.run([ 'mount', '-t', 'tmpfs', 'tmpfs', path ]) # Restore SELinux context system.set_context(path)
def __exit__(self, exc_type, exc_val, exc_tb): process.run([ 'umount', '-v', self.path ]) if self.encrypted: # avoid a circular import from the encryption module from ceph_volume.util import encryption encryption.dmcrypt_close(self.device)
def osd_mkfs_bluestore(osd_id, fsid, keyring=None, wal=False, db=False): """ Create the files for the OSD to function. A normal call will look like: ceph-osd --cluster ceph --mkfs --mkkey -i 0 \ --monmap /var/lib/ceph/osd/ceph-0/activate.monmap \ --osd-data /var/lib/ceph/osd/ceph-0 \ --osd-uuid 8d208665-89ae-4733-8888-5d3bfbeeec6c \ --keyring /var/lib/ceph/osd/ceph-0/keyring \ --setuser ceph --setgroup ceph In some cases it is required to use the keyring, when it is passed in as a keywork argument it is used as part of the ceph-osd command """ path = '/var/lib/ceph/osd/%s-%s/' % (conf.cluster, osd_id) monmap = os.path.join(path, 'activate.monmap') system.chown(path) base_command = [ 'sudo', 'ceph-osd', '--cluster', conf.cluster, # undocumented flag, sets the `type` file to contain 'bluestore' '--osd-objectstore', 'bluestore', '--mkfs', '-i', osd_id, '--monmap', monmap, ] supplementary_command = [ '--osd-data', path, '--osd-uuid', fsid, '--setuser', 'ceph', '--setgroup', 'ceph' ] if keyring is not None: base_command.extend(['--key', keyring]) if wal: base_command.extend( ['--bluestore-block-wal-path', wal] ) system.chown(wal) if db: base_command.extend( ['--bluestore-block-db-path', db] ) system.chown(db) command = base_command + supplementary_command process.run(command, obfuscate='--key')
def dmcrypt_close(mapping): """ Encrypt (close) a device, previously decrypted with cryptsetup :param mapping: """ if not os.path.exists(mapping): logger.debug('device mapper path does not exist %s' % mapping) logger.debug('will skip cryptsetup removal') return process.run(['cryptsetup', 'remove', mapping])
def chown(path, recursive=True): """ ``chown`` a path to the ceph user (uid and guid fetched at runtime) """ uid, gid = get_ceph_user_ids() if os.path.islink(path): path = os.path.realpath(path) if recursive: process.run(['chown', '-R', 'ceph:ceph', path]) else: os.chown(path, uid, gid)
def create_lv(name, group, size=None, **tags): """ Create a Logical Volume in a Volume Group. Command looks like:: lvcreate -L 50G -n gfslv vg0 ``name``, ``group``, and ``size`` are required. Tags are optional and are "translated" to include the prefixes for the Ceph LVM tag API. """ # XXX add CEPH_VOLUME_LVM_DEBUG to enable -vvvv on lv operations type_path_tag = { 'journal': 'ceph.journal_device', 'data': 'ceph.data_device', 'block': 'ceph.block', 'wal': 'ceph.wal', 'db': 'ceph.db', 'lockbox': 'ceph.lockbox_device', } if size: process.run([ 'sudo', 'lvcreate', '--yes', '-L', '%sG' % size, '-n', name, group ]) # create the lv with all the space available, this is needed because the # system call is different for LVM else: process.run([ 'sudo', 'lvcreate', '--yes', '-l', '100%FREE', '-n', name, group ]) lv = get_lv(lv_name=name, vg_name=group) ceph_tags = {} for k, v in tags.items(): ceph_tags['ceph.%s' % k] = v lv.set_tags(ceph_tags) # when creating a distinct type, the caller doesn't know what the path will # be so this function will set it after creation using the mapping path_tag = type_path_tag[tags['type']] lv.set_tags( {path_tag: lv.lv_path} ) return lv
def create_lv(name, group, size=None, tags=None): """ Create a Logical Volume in a Volume Group. Command looks like:: lvcreate -L 50G -n gfslv vg0 ``name``, ``group``, are required. If ``size`` is provided it must follow lvm's size notation (like 1G, or 20M). Tags are an optional dictionary and is expected to conform to the convention of prefixing them with "ceph." like:: {"ceph.block_device": "/dev/ceph/osd-1"} """ # XXX add CEPH_VOLUME_LVM_DEBUG to enable -vvvv on lv operations type_path_tag = { 'journal': 'ceph.journal_device', 'data': 'ceph.data_device', 'block': 'ceph.block_device', 'wal': 'ceph.wal_device', 'db': 'ceph.db_device', 'lockbox': 'ceph.lockbox_device', # XXX might not ever need this lockbox sorcery } if size: process.run([ 'sudo', 'lvcreate', '--yes', '-L', '%s' % size, '-n', name, group ]) # create the lv with all the space available, this is needed because the # system call is different for LVM else: process.run([ 'sudo', 'lvcreate', '--yes', '-l', '100%FREE', '-n', name, group ]) lv = get_lv(lv_name=name, vg_name=group) lv.set_tags(tags) # when creating a distinct type, the caller doesn't know what the path will # be so this function will set it after creation using the mapping path_tag = type_path_tag.get(tags.get('ceph.type')) if path_tag: lv.set_tags( {path_tag: lv.lv_path} ) return lv
def create_pv(device): """ Create a physical volume from a device, useful when devices need to be later mapped to journals. """ process.run([ 'pvcreate', '-v', # verbose '-f', # force it '--yes', # answer yes to any prompts device ])
def write_keyring(osd_id, secret): # FIXME this only works for cephx, but there will be other types of secrets # later osd_keyring = '/var/lib/ceph/osd/%s-%s/keyring' % (conf.cluster, osd_id) process.run( [ 'ceph-authtool', osd_keyring, '--create-keyring', '--name', 'osd.%s' % str(osd_id), '--add-key', secret ]) system.chown(osd_keyring)
def mount_osd(device, osd_id): destination = '/var/lib/ceph/osd/%s-%s' % (conf.cluster, osd_id) command = ['mount', '-t', 'xfs', '-o'] flags = conf.ceph.get_list( 'osd', 'osd_mount_options_xfs', default=constants.mount.get('xfs'), split=' ', ) command.append(flags) command.append(device) command.append(destination) process.run(command)
def remove_vg(vg_name): """ Removes a volume group. """ fail_msg = "Unable to remove vg %s".format(vg_name) process.run( [ 'vgremove', '-v', # verbose '-f', # force it vg_name ], fail_msg=fail_msg, )
def remove_pv(pv_name): """ Removes a physical volume. """ fail_msg = "Unable to remove vg %s".format(pv_name) process.run( [ 'pvremove', '-v', # verbose '-f', # force it pv_name ], fail_msg=fail_msg, )
def activate_bluestore(lvs): # find the osd osd_lv = lvs.get(lv_tags={'ceph.type': 'block'}) is_encrypted = osd_lv.tags.get('ceph.encrypted', '0') == '1' dmcrypt_secret = None osd_id = osd_lv.tags['ceph.osd_id'] conf.cluster = osd_lv.tags['ceph.cluster_name'] osd_fsid = osd_lv.tags['ceph.osd_fsid'] # mount on tmpfs the osd directory osd_path = '/var/lib/ceph/osd/%s-%s' % (conf.cluster, osd_id) if not system.path_is_mounted(osd_path): # mkdir -p and mount as tmpfs prepare_utils.create_osd_path(osd_id, tmpfs=True) # XXX This needs to be removed once ceph-bluestore-tool can deal with # symlinks that exist in the osd dir for link_name in ['block', 'block.db', 'block.wal']: link_path = os.path.join(osd_path, link_name) if os.path.exists(link_path): os.unlink(os.path.join(osd_path, link_name)) # encryption is handled here, before priming the OSD dir if is_encrypted: osd_lv_path = '/dev/mapper/%s' % osd_lv.lv_uuid lockbox_secret = osd_lv.tags['ceph.cephx_lockbox_secret'] encryption_utils.write_lockbox_keyring(osd_id, osd_fsid, lockbox_secret) dmcrypt_secret = encryption_utils.get_dmcrypt_key(osd_id, osd_fsid) encryption_utils.luks_open(dmcrypt_secret, osd_lv.lv_path, osd_lv.lv_uuid) else: osd_lv_path = osd_lv.lv_path db_device_path = get_osd_device_path(osd_lv, lvs, 'db', dmcrypt_secret=dmcrypt_secret) wal_device_path = get_osd_device_path(osd_lv, lvs, 'wal', dmcrypt_secret=dmcrypt_secret) # Once symlinks are removed, the osd dir can be 'primed again. process.run([ 'ceph-bluestore-tool', '--cluster=%s' % conf.cluster, 'prime-osd-dir', '--dev', osd_lv_path, '--path', osd_path]) # always re-do the symlink regardless if it exists, so that the block, # block.wal, and block.db devices that may have changed can be mapped # correctly every time process.run(['ln', '-snf', osd_lv_path, os.path.join(osd_path, 'block')]) system.chown(os.path.join(osd_path, 'block')) system.chown(osd_path) if db_device_path: destination = os.path.join(osd_path, 'block.db') process.run(['ln', '-snf', db_device_path, destination]) system.chown(db_device_path) if wal_device_path: destination = os.path.join(osd_path, 'block.wal') process.run(['ln', '-snf', wal_device_path, destination]) system.chown(wal_device_path) # enable the ceph-volume unit for this OSD systemctl.enable_volume(osd_id, osd_fsid, 'lvm') # start the OSD systemctl.start_osd(osd_id) terminal.success("ceph-volume lvm activate successful for osd ID: %s" % osd_id)
def reduce_vg(vg, devices): """ Reduce a Volume Group. Command looks like:: vgreduce --force --yes group_name [device, ...] :param vg: A VolumeGroup object :param devices: A list of devices to remove from the VG. Optionally, a single device (as a string) can be used. """ if not isinstance(devices, list): devices = [devices] process.run([ 'vgreduce', '--force', '--yes', vg.name] + devices ) return get_single_vg(filter={'vg_name': vg.name})
def get_monmap(osd_id): """ Before creating the OSD files, a monmap needs to be retrieved so that it can be used to tell the monitor(s) about the new OSD. A call will look like:: ceph --cluster ceph --name client.bootstrap-osd \ --keyring /var/lib/ceph/bootstrap-osd/ceph.keyring \ mon getmap -o /var/lib/ceph/osd/ceph-0/activate.monmap """ path = '/var/lib/ceph/osd/%s-%s/' % (conf.cluster, osd_id) bootstrap_keyring = '/var/lib/ceph/bootstrap-osd/%s.keyring' % conf.cluster monmap_destination = os.path.join(path, 'activate.monmap') process.run([ 'ceph', '--cluster', conf.cluster, '--name', 'client.bootstrap-osd', '--keyring', bootstrap_keyring, 'mon', 'getmap', '-o', monmap_destination ])
def create_vg(devices, name=None): """ Create a Volume Group. Command looks like:: vgcreate --force --yes group_name device Once created the volume group is returned as a ``VolumeGroup`` object :param devices: A list of devices to create a VG. Optionally, a single device (as a string) can be used. :param name: Optionally set the name of the VG, defaults to 'ceph-{uuid}' """ if not isinstance(devices, list): devices = [devices] if name is None: name = "ceph-%s" % str(uuid.uuid4()) process.run(['vgcreate', '--force', '--yes', name] + devices) vg = get_vg(vg_name=name) return vg
def format_device(device): # only supports xfs command = ['mkfs', '-t', 'xfs'] # get the mkfs options if any for xfs, # fallback to the default options defined in constants.mkfs flags = conf.ceph.get_list( 'osd', 'osd_mkfs_options_xfs', default=constants.mkfs.get('xfs'), split=' ', ) # always force if '-f' not in flags: flags.insert(0, '-f') command.extend(flags) command.append(device) process.run(command)
def mount_osd(device, osd_id, **kw): extras = [] is_vdo = kw.get('is_vdo', '0') if is_vdo == '1': extras = ['discard'] destination = '/var/lib/ceph/osd/%s-%s' % (conf.cluster, osd_id) command = ['mount', '-t', 'xfs', '-o'] flags = conf.ceph.get_list( 'osd', 'osd_mount_options_xfs', default=constants.mount.get('xfs'), split=' ', ) command.append(_normalize_mount_flags(flags, extras=extras)) command.append(device) command.append(destination) process.run(command) # Restore SELinux context system.set_context(destination)
def create_lv(name, group, size=None, **tags): """ Create a Logical Volume in a Volume Group. Command looks like:: lvcreate -L 50G -n gfslv vg0 ``name``, ``group``, and ``size`` are required. Tags are optional and are "translated" to include the prefixes for the Ceph LVM tag API. """ # XXX add CEPH_VOLUME_LVM_DEBUG to enable -vvvv on lv operations type_path_tag = { 'journal': 'ceph.journal_device', 'data': 'ceph.data_device', 'block': 'ceph.block', 'wal': 'ceph.wal', 'db': 'ceph.db', 'lockbox': 'ceph.lockbox_device', } if size: process.run([ 'sudo', 'lvcreate', '--yes', '-L', '%sG' % size, '-n', name, group ]) # create the lv with all the space available, this is needed because the # system call is different for LVM else: process.run( ['sudo', 'lvcreate', '--yes', '-l', '100%FREE', '-n', name, group]) lv = get_lv(lv_name=name, vg_name=group) ceph_tags = {} for k, v in tags.items(): ceph_tags['ceph.%s' % k] = v lv.set_tags(ceph_tags) # when creating a distinct type, the caller doesn't know what the path will # be so this function will set it after creation using the mapping path_tag = type_path_tag[tags['type']] lv.set_tags({path_tag: lv.lv_path}) return lv
def activate_filestore(lvs): # find the osd osd_lv = lvs.get(lv_tags={'ceph.type': 'data'}) if not osd_lv: raise RuntimeError('Unable to find a data LV for filestore activation') osd_id = osd_lv.tags['ceph.osd_id'] conf.cluster = osd_lv.tags['ceph.cluster_name'] # it may have a volume with a journal osd_journal_lv = lvs.get(lv_tags={'ceph.type': 'journal'}) # TODO: add sensible error reporting if this is ever the case # blow up with a KeyError if this doesn't exist osd_fsid = osd_lv.tags['ceph.osd_fsid'] if not osd_journal_lv: # must be a disk partition, by quering blkid by the uuid we are ensuring that the # device path is always correct osd_journal = disk.get_device_from_partuuid(osd_lv.tags['ceph.journal_uuid']) else: osd_journal = osd_lv.tags['ceph.journal_device'] if not osd_journal: raise RuntimeError('unable to detect an lv or device journal for OSD %s' % osd_id) # mount the osd source = osd_lv.lv_path destination = '/var/lib/ceph/osd/%s-%s' % (conf.cluster, osd_id) if not system.device_is_mounted(source, destination=destination): process.run(['sudo', 'mount', '-v', source, destination]) # always re-do the symlink regardless if it exists, so that the journal # device path that may have changed can be mapped correctly every time destination = '/var/lib/ceph/osd/%s-%s/journal' % (conf.cluster, osd_id) process.run(['sudo', 'ln', '-snf', osd_journal, destination]) # make sure that the journal has proper permissions system.chown(osd_journal) # enable the ceph-volume unit for this OSD systemctl.enable_volume(osd_id, osd_fsid, 'lvm') # start the OSD systemctl.start_osd(osd_id)
def write_keyring(osd_id, secret, keyring_name='keyring', name=None): """ Create a keyring file with the ``ceph-authtool`` utility. Constructs the path over well-known conventions for the OSD, and allows any other custom ``name`` to be set. :param osd_id: The ID for the OSD to be used :param secret: The key to be added as (as a string) :param name: Defaults to 'osd.{ID}' but can be used to add other client names, specifically for 'lockbox' type of keys :param keyring_name: Alternative keyring name, for supporting other types of keys like for lockbox """ osd_keyring = '/var/lib/ceph/osd/%s-%s/%s' % (conf.cluster, osd_id, keyring_name) name = name or 'osd.%s' % str(osd_id) process.run([ 'ceph-authtool', osd_keyring, '--create-keyring', '--name', name, '--add-key', secret ]) system.chown(osd_keyring)
def create_lv(name, group, size=None, tags=None): """ Create a Logical Volume in a Volume Group. Command looks like:: lvcreate -L 50G -n gfslv vg0 ``name``, ``group``, are required. If ``size`` is provided it must follow lvm's size notation (like 1G, or 20M). Tags are an optional dictionary and is expected to conform to the convention of prefixing them with "ceph." like:: {"ceph.block_device": "/dev/ceph/osd-1"} """ # XXX add CEPH_VOLUME_LVM_DEBUG to enable -vvvv on lv operations type_path_tag = { 'journal': 'ceph.journal_device', 'data': 'ceph.data_device', 'block': 'ceph.block_device', 'wal': 'ceph.wal_device', 'db': 'ceph.db_device', 'lockbox': 'ceph.lockbox_device', # XXX might not ever need this lockbox sorcery } if size: process.run( ['lvcreate', '--yes', '-L', '%s' % size, '-n', name, group]) # create the lv with all the space available, this is needed because the # system call is different for LVM else: process.run(['lvcreate', '--yes', '-l', '100%FREE', '-n', name, group]) lv = get_lv(lv_name=name, vg_name=group) lv.set_tags(tags) # when creating a distinct type, the caller doesn't know what the path will # be so this function will set it after creation using the mapping path_tag = type_path_tag.get(tags.get('ceph.type')) if path_tag: lv.set_tags({path_tag: lv.lv_path}) return lv
def activate_bluestore(lvs): # find the osd osd_lv = lvs.get(lv_tags={'ceph.type': 'block'}) osd_id = osd_lv.tags['ceph.osd_id'] conf.cluster = osd_lv.tags['ceph.cluster_name'] osd_fsid = osd_lv.tags['ceph.osd_fsid'] db_device_path = get_osd_device_path(osd_lv, lvs, 'db') wal_device_path = get_osd_device_path(osd_lv, lvs, 'wal') # mount on tmpfs the osd directory osd_path = '/var/lib/ceph/osd/%s-%s' % (conf.cluster, osd_id) if not system.path_is_mounted(osd_path): # mkdir -p and mount as tmpfs prepare_utils.create_osd_path(osd_id, tmpfs=True) # XXX This needs to be removed once ceph-bluestore-tool can deal with # symlinks that exist in the osd dir for link_name in ['block', 'block.db', 'block.wal']: link_path = os.path.join(osd_path, link_name) if os.path.exists(link_path): os.unlink(os.path.join(osd_path, link_name)) # Once symlinks are removed, the osd dir can be 'primed again. process.run([ 'ceph-bluestore-tool', '--cluster=%s' % conf.cluster, 'prime-osd-dir', '--dev', osd_lv.lv_path, '--path', osd_path ]) # always re-do the symlink regardless if it exists, so that the block, # block.wal, and block.db devices that may have changed can be mapped # correctly every time process.run( ['ln', '-snf', osd_lv.lv_path, os.path.join(osd_path, 'block')]) system.chown(os.path.join(osd_path, 'block')) system.chown(osd_path) if db_device_path: destination = os.path.join(osd_path, 'block.db') process.run(['ln', '-snf', db_device_path, destination]) system.chown(db_device_path) if wal_device_path: destination = os.path.join(osd_path, 'block.wal') process.run(['ln', '-snf', wal_device_path, destination]) system.chown(wal_device_path) # enable the ceph-volume unit for this OSD systemctl.enable_volume(osd_id, osd_fsid, 'lvm') # start the OSD systemctl.start_osd(osd_id) terminal.success("ceph-volume lvm activate successful for osd ID: %s" % osd_id)
def activate_filestore(lvs): # find the osd osd_lv = lvs.get(lv_tags={'ceph.type': 'data'}) osd_id = osd_lv.tags['ceph.osd_id'] # it may have a volume with a journal osd_journal_lv = lvs.get(lv_tags={'ceph.type': 'journal'}) # TODO: add sensible error reporting if this is ever the case # blow up with a KeyError if this doesn't exist osd_fsid = osd_lv.tags['ceph.osd_fsid'] if not osd_journal_lv: osd_journal = osd_lv.tags.get('ceph.journal_device') else: osd_journal = osd_journal.lv_path if not osd_journal: raise RuntimeError( 'unable to detect an lv or device journal for OSD %s' % osd_id) # mount the osd source = osd_lv.lv_path destination = '/var/lib/ceph/osd/%s-%s' % (conf.cluster, osd_id) if not system.is_mounted(source, destination=destination): process.run(['sudo', 'mount', '-v', source, destination]) # ensure that the symlink for the journal is there if not os.path.exists(osd_journal): source = osd_journal destination = '/var/lib/ceph/osd/%s-%s/journal' % (conf.cluster, osd_id) process.run(['sudo', 'ln', '-s', source, destination]) # make sure that the journal has proper permissions system.chown(osd_journal) # enable the ceph-volume unit for this OSD systemctl.enable_volume(osd_id, osd_fsid, 'lvm') # start the OSD systemctl.start_osd(osd_id)
def extend_vg(vg, devices): """ Extend a Volume Group. Command looks like:: vgextend --force --yes group_name [device, ...] Once created the volume group is extended and returned as a ``VolumeGroup`` object :param vg: A VolumeGroup object :param devices: A list of devices to extend the VG. Optionally, a single device (as a string) can be used. """ if not isinstance(devices, list): devices = [devices] process.run([ 'vgextend', '--force', '--yes', vg.name] + devices ) return get_first_vg(filters={'vg_name': vg.name})
def set_context(path, recursive=False): """ Calls ``restorecon`` to set the proper context on SELinux systems. Only if the ``restorecon`` executable is found anywhere in the path it will get called. If the ``CEPH_VOLUME_SKIP_RESTORECON`` environment variable is set to any of: "1", "true", "yes" the call will be skipped as well. Finally, if SELinux is not enabled, or not available in the system, ``restorecon`` will not be called. This is checked by calling out to the ``selinuxenabled`` executable. If that tool is not installed or returns a non-zero exit status then no further action is taken and this function will return. """ skip = os.environ.get('CEPH_VOLUME_SKIP_RESTORECON', '') if skip.lower() in ['1', 'true', 'yes']: logger.info( 'CEPH_VOLUME_SKIP_RESTORECON environ is set, will not call restorecon' ) return try: stdout, stderr, code = process.call(['selinuxenabled'], verbose_on_failure=False) except FileNotFoundError: logger.info('No SELinux found, skipping call to restorecon') return if code != 0: logger.info('SELinux is not enabled, will not call restorecon') return # restore selinux context to default policy values if which('restorecon').startswith('/'): if recursive: process.run(['restorecon', '-R', path]) else: process.run(['restorecon', path])
def activate_bluestore(lvs): # find the osd osd_lv = lvs.get(lv_tags={'ceph.type': 'block'}) osd_id = osd_lv.tags['ceph.osd_id'] osd_fsid = osd_lv.tags['ceph.osd_fsid'] db_device_path = get_osd_device_path(osd_lv, lvs, 'db') wal_device_path = get_osd_device_path(osd_lv, lvs, 'wal') # mount on tmpfs the osd directory osd_path = '/var/lib/ceph/osd/%s-%s' % (conf.cluster, osd_id) if not system.path_is_mounted(osd_path): # mkdir -p and mount as tmpfs prepare_utils.create_osd_path(osd_id, tmpfs=True) # if the osd dir was not mounted via tmpfs, it means that the files are # gone, so it needs to be 'primed' again. The command would otherwise # fail if the directory was already populated process.run([ 'sudo', 'ceph-bluestore-tool', 'prime-osd-dir', '--dev', osd_lv.lv_path, '--path', osd_path ]) # always re-do the symlink regardless if it exists, so that the block, # block.wal, and block.db devices that may have changed can be mapped # correctly every time process.run([ 'sudo', 'ln', '-snf', osd_lv.lv_path, os.path.join(osd_path, 'block') ]) system.chown(os.path.join(osd_path, 'block')) system.chown(osd_path) if db_device_path: destination = os.path.join(osd_path, 'block.db') process.run(['sudo', 'ln', '-snf', db_device_path, destination]) system.chown(db_device_path) if wal_device_path: destination = os.path.join(osd_path, 'block.wal') process.run(['sudo', 'ln', '-snf', wal_device_path, destination]) system.chown(wal_device_path) # enable the ceph-volume unit for this OSD systemctl.enable_volume(osd_id, osd_fsid, 'lvm') # start the OSD systemctl.start_osd(osd_id)
def wipefs(path): """ Removes the filesystem from an lv or partition. """ process.run(['wipefs', '--all', path])
def __exit__(self, exc_type, exc_val, exc_tb): process.run(['sudo', 'umount', '-v', self.path])
def create_lv(name_prefix, uuid, vg=None, device=None, slots=None, extents=None, size=None, tags=None): """ Create a Logical Volume in a Volume Group. Command looks like:: lvcreate -L 50G -n gfslv vg0 ``name_prefix`` is required. If ``size`` is provided its expected to be a byte count. Tags are an optional dictionary and is expected to conform to the convention of prefixing them with "ceph." like:: {"ceph.block_device": "/dev/ceph/osd-1"} :param name_prefix: name prefix for the LV, typically somehting like ceph-osd-block :param uuid: UUID to ensure uniqueness; is combined with name_prefix to form the LV name :param vg: optional, pass an existing VG to create LV :param device: optional, device to use. Either device of vg must be passed :param slots: optional, number of slots to divide vg up, LV will occupy one one slot if enough space is available :param extends: optional, how many lvm extends to use, supersedes slots :param size: optional, target LV size in bytes, supersedes extents, resulting LV might be smaller depending on extent size of the underlying VG :param tags: optional, a dict of lvm tags to set on the LV """ name = '{}-{}'.format(name_prefix, uuid) if not vg: if not device: raise RuntimeError("Must either specify vg or device, none given") # check if a vgs starting with ceph already exists vgs = get_device_vgs(device, 'ceph') if vgs: vg = vgs[0] else: # create on if not vg = create_vg(device, name_prefix='ceph') assert (vg) if size: extents = vg.bytes_to_extents(size) logger.debug('size was passed: {} -> {}'.format(size, extents)) elif slots and not extents: extents = vg.slots_to_extents(slots) logger.debug('slots was passed: {} -> {}'.format(slots, extents)) if extents: command = [ 'lvcreate', '--yes', '-l', '{}'.format(extents), '-n', name, vg.vg_name ] # create the lv with all the space available, this is needed because the # system call is different for LVM else: command = [ 'lvcreate', '--yes', '-l', '100%FREE', '-n', name, vg.vg_name ] process.run(command) lv = get_lv(lv_name=name, vg_name=vg.vg_name) if tags is None: tags = { "ceph.osd_id": "null", "ceph.type": "null", "ceph.cluster_fsid": "null", "ceph.osd_fsid": "null", } # when creating a distinct type, the caller doesn't know what the path will # be so this function will set it after creation using the mapping # XXX add CEPH_VOLUME_LVM_DEBUG to enable -vvvv on lv operations type_path_tag = { 'journal': 'ceph.journal_device', 'data': 'ceph.data_device', 'block': 'ceph.block_device', 'wal': 'ceph.wal_device', 'db': 'ceph.db_device', 'lockbox': 'ceph.lockbox_device', # XXX might not ever need this lockbox sorcery } path_tag = type_path_tag.get(tags.get('ceph.type')) if path_tag: tags.update({path_tag: lv.lv_path}) lv.set_tags(tags) return lv
def start(unit): process.run(['systemctl', 'start', unit])
def __enter__(self): self.path = tempfile.mkdtemp() process.run(['mount', '-v', self.device, self.path]) return self.path
def mount_tmpfs(path): process.run(['mount', '-t', 'tmpfs', 'tmpfs', path]) # Restore SELinux context system.set_context(path)
def activate_filestore(lvs, no_systemd=False): # find the osd osd_lv = lvs.get(lv_tags={'ceph.type': 'data'}) if not osd_lv: raise RuntimeError('Unable to find a data LV for filestore activation') is_encrypted = osd_lv.tags.get('ceph.encrypted', '0') == '1' is_vdo = osd_lv.tags.get('ceph.vdo', '0') osd_id = osd_lv.tags['ceph.osd_id'] conf.cluster = osd_lv.tags['ceph.cluster_name'] # it may have a volume with a journal osd_journal_lv = lvs.get(lv_tags={'ceph.type': 'journal'}) # TODO: add sensible error reporting if this is ever the case # blow up with a KeyError if this doesn't exist osd_fsid = osd_lv.tags['ceph.osd_fsid'] if not osd_journal_lv: # must be a disk partition, by quering blkid by the uuid we are ensuring that the # device path is always correct journal_uuid = osd_lv.tags['ceph.journal_uuid'] osd_journal = disk.get_device_from_partuuid(journal_uuid) else: journal_uuid = osd_journal_lv.lv_uuid osd_journal = osd_lv.tags['ceph.journal_device'] if not osd_journal: raise RuntimeError( 'unable to detect an lv or device journal for OSD %s' % osd_id) # this is done here, so that previous checks that ensure path availability # and correctness can still be enforced, and report if any issues are found if is_encrypted: lockbox_secret = osd_lv.tags['ceph.cephx_lockbox_secret'] # this keyring writing is idempotent encryption_utils.write_lockbox_keyring(osd_id, osd_fsid, lockbox_secret) dmcrypt_secret = encryption_utils.get_dmcrypt_key(osd_id, osd_fsid) encryption_utils.luks_open(dmcrypt_secret, osd_lv.lv_path, osd_lv.lv_uuid) encryption_utils.luks_open(dmcrypt_secret, osd_journal, journal_uuid) osd_journal = '/dev/mapper/%s' % journal_uuid source = '/dev/mapper/%s' % osd_lv.lv_uuid else: source = osd_lv.lv_path # mount the osd destination = '/var/lib/ceph/osd/%s-%s' % (conf.cluster, osd_id) if not system.device_is_mounted(source, destination=destination): prepare_utils.mount_osd(source, osd_id, is_vdo=is_vdo) # always re-do the symlink regardless if it exists, so that the journal # device path that may have changed can be mapped correctly every time destination = '/var/lib/ceph/osd/%s-%s/journal' % (conf.cluster, osd_id) process.run(['ln', '-snf', osd_journal, destination]) # make sure that the journal has proper permissions system.chown(osd_journal) if no_systemd is False: # enable the ceph-volume unit for this OSD systemctl.enable_volume(osd_id, osd_fsid, 'lvm') # enable the OSD systemctl.enable_osd(osd_id) # start the OSD systemctl.start_osd(osd_id) terminal.success("ceph-volume lvm activate successful for osd ID: %s" % osd_id)
def activate_bluestore(lvs, no_systemd=False): # find the osd osd_lv = lvs.get(lv_tags={'ceph.type': 'block'}) if not osd_lv: raise RuntimeError('could not find a bluestore OSD to activate') is_encrypted = osd_lv.tags.get('ceph.encrypted', '0') == '1' dmcrypt_secret = None osd_id = osd_lv.tags['ceph.osd_id'] conf.cluster = osd_lv.tags['ceph.cluster_name'] osd_fsid = osd_lv.tags['ceph.osd_fsid'] # mount on tmpfs the osd directory osd_path = '/var/lib/ceph/osd/%s-%s' % (conf.cluster, osd_id) if not system.path_is_mounted(osd_path): # mkdir -p and mount as tmpfs prepare_utils.create_osd_path(osd_id, tmpfs=True) # XXX This needs to be removed once ceph-bluestore-tool can deal with # symlinks that exist in the osd dir for link_name in ['block', 'block.db', 'block.wal']: link_path = os.path.join(osd_path, link_name) if os.path.exists(link_path): os.unlink(os.path.join(osd_path, link_name)) # encryption is handled here, before priming the OSD dir if is_encrypted: osd_lv_path = '/dev/mapper/%s' % osd_lv.lv_uuid lockbox_secret = osd_lv.tags['ceph.cephx_lockbox_secret'] encryption_utils.write_lockbox_keyring(osd_id, osd_fsid, lockbox_secret) dmcrypt_secret = encryption_utils.get_dmcrypt_key(osd_id, osd_fsid) encryption_utils.luks_open(dmcrypt_secret, osd_lv.lv_path, osd_lv.lv_uuid) else: osd_lv_path = osd_lv.lv_path db_device_path = get_osd_device_path(osd_lv, lvs, 'db', dmcrypt_secret=dmcrypt_secret) wal_device_path = get_osd_device_path(osd_lv, lvs, 'wal', dmcrypt_secret=dmcrypt_secret) # Once symlinks are removed, the osd dir can be 'primed again. process.run([ 'ceph-bluestore-tool', '--cluster=%s' % conf.cluster, 'prime-osd-dir', '--dev', osd_lv_path, '--path', osd_path, '--no-mon-config' ]) # always re-do the symlink regardless if it exists, so that the block, # block.wal, and block.db devices that may have changed can be mapped # correctly every time process.run(['ln', '-snf', osd_lv_path, os.path.join(osd_path, 'block')]) system.chown(os.path.join(osd_path, 'block')) system.chown(osd_path) if db_device_path: destination = os.path.join(osd_path, 'block.db') process.run(['ln', '-snf', db_device_path, destination]) system.chown(db_device_path) system.chown(destination) if wal_device_path: destination = os.path.join(osd_path, 'block.wal') process.run(['ln', '-snf', wal_device_path, destination]) system.chown(wal_device_path) system.chown(destination) if no_systemd is False: # enable the ceph-volume unit for this OSD systemctl.enable_volume(osd_id, osd_fsid, 'lvm') # enable the OSD systemctl.enable_osd(osd_id) # start the OSD systemctl.start_osd(osd_id) terminal.success("ceph-volume lvm activate successful for osd ID: %s" % osd_id)
def mask(unit): process.run(['systemctl', 'mask', unit])
def disable(unit): process.run(['systemctl', 'disable', unit])
def enable(unit, runtime=False): if runtime: process.run(['systemctl', 'enable', '--runtime', unit]) else: process.run(['systemctl', 'enable', unit])
def stop(unit): process.run(['systemctl', 'stop', unit])
def activate(self, args): with open(args.json_config, 'r') as fp: osd_metadata = json.load(fp) # Make sure that required devices are configured self.validate_devices(osd_metadata) osd_id = osd_metadata.get('whoami', args.osd_id) osd_fsid = osd_metadata.get('fsid', args.osd_fsid) data_uuid = osd_metadata.get('data', {}).get('uuid') conf.cluster = osd_metadata.get('cluster_name', 'ceph') if not data_uuid: raise RuntimeError( 'Unable to activate OSD %s - no "uuid" key found for data' % args.osd_id) # Encryption detection, and capturing of the keys to decrypt self.is_encrypted = osd_metadata.get('encrypted', False) self.encryption_type = osd_metadata.get('encryption_type') if self.is_encrypted: lockbox_secret = osd_metadata.get('lockbox.keyring') # write the keyring always so that we can unlock encryption_utils.write_lockbox_keyring(osd_id, osd_fsid, lockbox_secret) # Store the secret around so that the decrypt method can reuse raw_dmcrypt_secret = encryption_utils.get_dmcrypt_key( osd_id, osd_fsid) # Note how both these calls need b64decode. For some reason, the # way ceph-disk creates these keys, it stores them in the monitor # *undecoded*, requiring this decode call again. The lvm side of # encryption doesn't need it, so we are assuming here that anything # that `simple` scans, will come from ceph-disk and will need this # extra decode call here self.dmcrypt_secret = base64.b64decode(raw_dmcrypt_secret) cluster_name = osd_metadata.get('cluster_name', 'ceph') osd_dir = '/var/lib/ceph/osd/%s-%s' % (cluster_name, osd_id) # XXX there is no support for LVM here data_device = self.get_device(data_uuid) if not data_device: raise RuntimeError("osd fsid {} doesn't exist, this file will " "be skipped, consider cleaning legacy " "json file {}".format(osd_metadata['fsid'], args.json_config)) journal_device = self.get_device( osd_metadata.get('journal', {}).get('uuid')) block_device = self.get_device( osd_metadata.get('block', {}).get('uuid')) block_db_device = self.get_device( osd_metadata.get('block.db', {}).get('uuid')) block_wal_device = self.get_device( osd_metadata.get('block.wal', {}).get('uuid')) if not system.device_is_mounted(data_device, destination=osd_dir): if osd_metadata.get('type') == 'filestore': prepare_utils.mount_osd(data_device, osd_id) else: process.run(['mount', '-v', data_device, osd_dir]) device_map = { 'journal': journal_device, 'block': block_device, 'block.db': block_db_device, 'block.wal': block_wal_device } for name, device in device_map.items(): if not device: continue # always re-do the symlink regardless if it exists, so that the journal # device path that may have changed can be mapped correctly every time destination = os.path.join(osd_dir, name) process.run(['ln', '-snf', device, destination]) # make sure that the journal has proper permissions system.chown(device) self.enable_systemd_units(osd_id, osd_fsid) terminal.success('Successfully activated OSD %s with FSID %s' % (osd_id, osd_fsid))
def test_log_descriptors(self): process.run(['ls', '-l'])
def mount_tmpfs(path): process.run(['mount', '-t', 'tmpfs', 'tmpfs', path])
def __exit__(self, exc_type, exc_val, exc_tb): process.run(['umount', '-v', self.path]) if self.encrypted: # avoid a circular import from the encryption module from ceph_volume.util import encryption encryption.dmcrypt_close(self.device)