def scan_directory(self, path): osd_metadata = {'cluster_name': conf.cluster} directory_files = os.listdir(path) if 'keyring' not in directory_files: raise RuntimeError( 'OSD files not found, required "keyring" file is not present at: %s' % path) for file_ in os.listdir(path): file_path = os.path.join(path, file_) file_json_key = file_ if file_.endswith('_dmcrypt'): file_json_key = file_.rstrip('_dmcrypt') logger.info(('reading file {}, stripping _dmcrypt', 'suffix').format(file_)) if os.path.islink(file_path): if os.path.exists(file_path): osd_metadata[file_json_key] = self.scan_device(file_path) else: msg = 'broken symlink found %s -> %s' % ( file_path, os.path.realpath(file_path)) terminal.warning(msg) logger.warning(msg) if os.path.isdir(file_path): continue # the check for binary needs to go before the file, to avoid # capturing data from binary files but still be able to capture # contents from actual files later try: if system.is_binary(file_path): logger.info('skipping binary file: %s' % file_path) continue except IOError: logger.exception('skipping due to IOError on file: %s' % file_path) continue if os.path.isfile(file_path): content = self.get_contents(file_path) if 'keyring' in file_path: content = parse_keyring(content) try: osd_metadata[file_json_key] = int(content) except ValueError: osd_metadata[file_json_key] = content # we must scan the paths again because this might be a temporary mount path_mounts = system.get_mounts(paths=True) device = path_mounts.get(path) # it is possible to have more than one device, pick the first one, and # warn that it is possible that more than one device is 'data' if not device: terminal.error('Unable to detect device mounted for path: %s' % path) raise RuntimeError('Cannot activate OSD') osd_metadata['data'] = self.scan_device( device[0] if len(device) else None) return osd_metadata
def enable_systemd_units(self, osd_id, osd_fsid): """ * disables the ceph-disk systemd units to prevent them from running when a UDEV event matches Ceph rules * creates the ``simple`` systemd units to handle the activation and startup of the OSD with ``osd_id`` and ``osd_fsid`` * enables the OSD systemd unit and finally starts the OSD. """ if not self.from_trigger and not self.skip_systemd: # means it was scanned and now activated directly, so ensure that # ceph-disk units are disabled, and that the `simple` systemd unit # is created and enabled # enable the ceph-volume unit for this OSD systemctl.enable_volume(osd_id, osd_fsid, 'simple') # disable any/all ceph-disk units systemctl.mask_ceph_disk() terminal.warning( ('All ceph-disk systemd units have been disabled to ' 'prevent OSDs getting triggered by UDEV events')) else: terminal.info('Skipping enabling of `simple` systemd unit') terminal.info('Skipping masking of ceph-disk systemd units') if not self.skip_systemd: # enable the OSD systemctl.enable_osd(osd_id) # start the OSD systemctl.start_osd(osd_id) else: terminal.info( 'Skipping enabling and starting OSD simple systemd unit because --no-systemd was used' )
def exclude_group_options(parser, groups, argv=None): """ ``argparse`` has the ability to check for mutually exclusive options, but it only allows a basic XOR behavior: only one flag can be used from a defined group of options. This doesn't help when two groups of options need to be separated. For example, with filestore and bluestore, neither set can be used in conjunction with the other set. This helper validator will consume the parser to inspect the group flags, and it will group them together from ``groups``. This allows proper error reporting, matching each incompatible flag with its group name. :param parser: The argparse object, once it has configured all flags. It is required to contain the group names being used to validate. :param groups: A list of group names (at least two), with the same used for ``add_argument_group`` :param argv: Consume the args (sys.argv) directly from this argument .. note: **Unfortunately** this will not be able to validate correctly when using default flags. In the case of filestore vs. bluestore, ceph-volume defaults to --bluestore, but we can't check that programmatically, we can only parse the flags seen via argv """ # Reduce the parser groups to only the groups we need to intersect parser_groups = [g for g in parser._action_groups if g.title in groups] # A mapping of the group name to flags/options group_flags = {} flags_to_verify = [] for group in parser_groups: # option groups may have more than one item in ``option_strings``, this # will loop over ``_group_actions`` which contains the # ``option_strings``, like ``['--filestore']`` group_flags[group.title] = [ option for group_action in group._group_actions for option in group_action.option_strings ] # Gather all the flags present in the groups so that we only check on those. for flags in group_flags.values(): flags_to_verify.extend(flags) seen = [] last_flag = None last_group = None for flag in argv: if flag not in flags_to_verify: continue for group_name, flags in group_flags.items(): if flag in flags: seen.append(group_name) # We are mutually excluding groups, so having more than 1 group # in ``seen`` means we must raise an error if len(set(seen)) == len(groups): terminal.warning('Incompatible flags were found, some values may get ignored') msg = 'Cannot use %s (%s) with %s (%s)' % ( last_flag, last_group, flag, group_name ) terminal.warning(msg) last_group = group_name last_flag = flag
def wipefs(path): """ Removes the filesystem from an lv or partition. Environment variables supported:: * ``CEPH_VOLUME_WIPEFS_TRIES``: Defaults to 8 * ``CEPH_VOLUME_WIPEFS_INTERVAL``: Defaults to 5 """ tries = str_to_int(os.environ.get('CEPH_VOLUME_WIPEFS_TRIES', 8)) interval = str_to_int(os.environ.get('CEPH_VOLUME_WIPEFS_INTERVAL', 5)) for trying in range(tries): stdout, stderr, exit_code = process.call(['wipefs', '--all', path]) if exit_code != 0: # this could narrow the retry by poking in the stderr of the output # to verify that 'probing initialization failed' appears, but # better to be broad in this retry to prevent missing on # a different message that needs to be retried as well terminal.warning( 'failed to wipefs device, will try again to workaround probable race condition' ) time.sleep(interval) else: return raise RuntimeError("could not complete wipefs on device: %s" % path)
def setup(name='ceph-volume.log', log_path=None, log_level=None): log_path = log_path or conf.log_path # if a non-root user calls help or other no-sudo-required command the # logger will fail to write to /var/lib/ceph/ so this /tmp/ path is used as # a fallback tmp_log_file = os.path.join('/tmp/', name) root_logger = logging.getLogger() # The default path is where all ceph log files are, and will get rotated by # Ceph's logrotate rules. log_level = log_level or "DEBUG" log_level = getattr(logging, log_level.upper()) root_logger.setLevel(log_level) try: fh = logging.FileHandler(log_path) except (OSError, IOError) as err: terminal.warning("Falling back to /tmp/ for logging. Can't use %s" % log_path) terminal.warning(str(err)) conf.log_path = tmp_log_file fh = logging.FileHandler(tmp_log_file) fh.setLevel(log_level) fh.setFormatter(logging.Formatter(FILE_FORMAT)) root_logger.addHandler(fh)
def run(command, run_on_host=False, **kw): """ A real-time-logging implementation of a remote subprocess.Popen call where a command is just executed on the remote end and no other handling is done. :param command: The command to pass in to the remote subprocess.Popen as a list :param stop_on_error: If a nonzero exit status is return, it raises a ``RuntimeError`` :param fail_msg: If a nonzero exit status is returned this message will be included in the log """ executable = which(command.pop(0), run_on_host) command.insert(0, executable) if run_on_host and path.isdir(host_rootfs): command = run_host_cmd + command stop_on_error = kw.pop('stop_on_error', True) command_msg = obfuscate(command, kw.pop('obfuscate', None)) fail_msg = kw.pop('fail_msg', None) logger.info(command_msg) terminal.write(command_msg) terminal_logging = kw.pop('terminal_logging', True) process = subprocess.Popen( command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True, **kw ) while True: reads, _, _ = select( [process.stdout.fileno(), process.stderr.fileno()], [], [] ) log_descriptors(reads, process, terminal_logging) if process.poll() is not None: # ensure we do not have anything pending in stdout or stderr log_descriptors(reads, process, terminal_logging) break returncode = process.wait() if returncode != 0: msg = "command returned non-zero exit status: %s" % returncode if fail_msg: logger.warning(fail_msg) if terminal_logging: terminal.warning(fail_msg) if stop_on_error: raise RuntimeError(msg) else: if terminal_logging: terminal.warning(msg) logger.warning(msg)
def run(command, **kw): """ A real-time-logging implementation of a remote subprocess.Popen call where a command is just executed on the remote end and no other handling is done. :param command: The command to pass in to the remote subprocess.Popen as a list :param stop_on_error: If a nonzero exit status is return, it raises a ``RuntimeError`` :param fail_msg: If a nonzero exit status is returned this message will be included in the log """ executable = which(command.pop(0)) command.insert(0, executable) stop_on_error = kw.pop('stop_on_error', True) command_msg = obfuscate(command, kw.pop('obfuscate', None)) fail_msg = kw.pop('fail_msg', None) logger.info(command_msg) terminal.write(command_msg) terminal_logging = kw.pop('terminal_logging', True) process = subprocess.Popen( command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True, **kw ) while True: reads, _, _ = select( [process.stdout.fileno(), process.stderr.fileno()], [], [] ) log_descriptors(reads, process, terminal_logging) if process.poll() is not None: # ensure we do not have anything pending in stdout or stderr log_descriptors(reads, process, terminal_logging) break returncode = process.wait() if returncode != 0: msg = "command returned non-zero exit status: %s" % returncode if fail_msg: logger.warning(fail_msg) if terminal_logging: terminal.warning(fail_msg) if stop_on_error: raise RuntimeError(msg) else: if terminal_logging: terminal.warning(msg) logger.warning(msg)
def scan_directory(self, path): osd_metadata = {'cluster_name': conf.cluster} directory_files = os.listdir(path) if 'keyring' not in directory_files: raise RuntimeError( 'OSD files not found, required "keyring" file is not present at: %s' % path ) for _file in os.listdir(path): file_path = os.path.join(path, _file) if os.path.islink(file_path): if os.path.exists(file_path): osd_metadata[_file] = self.scan_device(file_path) else: msg = 'broken symlink found %s -> %s' % (file_path, os.path.realpath(file_path)) terminal.warning(msg) logger.warning(msg) if os.path.isdir(file_path): continue # the check for binary needs to go before the file, to avoid # capturing data from binary files but still be able to capture # contents from actual files later try: if system.is_binary(file_path): logger.info('skipping binary file: %s' % file_path) continue except IOError: logger.exception('skipping due to IOError on file: %s' % file_path) continue if os.path.isfile(file_path): content = self.get_contents(file_path) if 'keyring' in file_path: content = parse_keyring(content) try: osd_metadata[_file] = int(content) except ValueError: osd_metadata[_file] = content # we must scan the paths again because this might be a temporary mount path_mounts = system.get_mounts(paths=True) device = path_mounts.get(path) # it is possible to have more than one device, pick the first one, and # warn that it is possible that more than one device is 'data' if not device: terminal.error('Unable to detect device mounted for path: %s' % path) raise RuntimeError('Cannot activate OSD') osd_metadata['data'] = self.scan_device(device[0] if len(device) else None) return osd_metadata
def run(command, **kw): """ A real-time-logging implementation of a remote subprocess.Popen call where a command is just executed on the remote end and no other handling is done. :param command: The command to pass in to the remote subprocess.Popen as a list :param stop_on_error: If a nonzero exit status is return, it raises a ``RuntimeError`` """ stop_on_error = kw.pop('stop_on_error', True) command_msg = "Running command: %s" % ' '.join(command) stdin = kw.pop('stdin', None) logger.info(command_msg) terminal.write(command_msg) terminal_logging = kw.pop('terminal_logging', True) process = subprocess.Popen( command, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True, **kw ) if stdin: process.communicate(stdin) while True: reads, _, _ = select( [process.stdout.fileno(), process.stderr.fileno()], [], [] ) log_descriptors(reads, process, terminal_logging) if process.poll() is not None: # ensure we do not have anything pending in stdout or stderr log_descriptors(reads, process, terminal_logging) break returncode = process.wait() if returncode != 0: msg = "command returned non-zero exit status: %s" % returncode if stop_on_error: raise RuntimeError(msg) else: if terminal_logging: terminal.warning(msg) logger.warning(msg)
def run(command, **kw): """ A real-time-logging implementation of a remote subprocess.Popen call where a command is just executed on the remote end and no other handling is done. :param command: The command to pass in to the remote subprocess.Popen as a list :param stop_on_error: If a nonzero exit status is return, it raises a ``RuntimeError`` """ stop_on_error = kw.pop('stop_on_error', True) command_msg = obfuscate(command, kw.pop('obfuscate', None)) stdin = kw.pop('stdin', None) logger.info(command_msg) terminal.write(command_msg) terminal_logging = kw.pop('terminal_logging', True) process = subprocess.Popen( command, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True, **kw ) if stdin: process.communicate(stdin) while True: reads, _, _ = select( [process.stdout.fileno(), process.stderr.fileno()], [], [] ) log_descriptors(reads, process, terminal_logging) if process.poll() is not None: # ensure we do not have anything pending in stdout or stderr log_descriptors(reads, process, terminal_logging) break returncode = process.wait() if returncode != 0: msg = "command returned non-zero exit status: %s" % returncode if stop_on_error: raise RuntimeError(msg) else: if terminal_logging: terminal.warning(msg) logger.warning(msg)
def scan(self, args): osd_metadata = {'cluster_name': conf.cluster} device_mounts = system.get_mounts(devices=True) osd_path = None logger.info('detecting if argument is a device or a directory: %s', args.osd_path) if os.path.isdir(args.osd_path): logger.info('will scan directly, path is a directory') osd_path = args.osd_path else: # assume this is a device, check if it is mounted and use that path logger.info('path is not a directory, will check if mounted') if system.device_is_mounted(args.osd_path): logger.info('argument is a device, which is mounted') mounted_osd_paths = device_mounts.get(args.osd_path) osd_path = mounted_osd_paths[0] if len(mounted_osd_paths) else None # argument is not a directory, and it is not a device that is mounted # somewhere so temporarily mount it to poke inside, otherwise, scan # directly if not osd_path: logger.info('device is not mounted, will mount it temporarily to scan') with system.tmp_mount(args.osd_path) as osd_path: osd_metadata = self.scan_directory(osd_path) else: logger.info('will scan OSD directory at path: %s', osd_path) osd_metadata = self.scan_directory(osd_path) osd_id = osd_metadata['whoami'] osd_fsid = osd_metadata['fsid'] filename = '%s-%s.json' % (osd_id, osd_fsid) json_path = os.path.join(self.etc_path, filename) if os.path.exists(json_path) and not args.stdout: if not args.force: raise RuntimeError( '--force was not used and OSD metadata file exists: %s' % json_path ) if args.stdout: print(json.dumps(osd_metadata, indent=4, sort_keys=True, ensure_ascii=False)) else: with open(json_path, 'w') as fp: json.dump(osd_metadata, fp, indent=4, sort_keys=True, ensure_ascii=False) terminal.success( 'OSD %s got scanned and metadata persisted to file: %s' % ( osd_id, json_path ) ) terminal.success( 'To take over managment of this scanned OSD, and disable ceph-disk and udev, run:' ) terminal.success(' ceph-volume simple activate %s %s' % (osd_id, osd_fsid)) if not osd_metadata.get('data'): msg = 'Unable to determine device mounted on %s' % args.osd_path logger.warning(msg) terminal.warning(msg) terminal.warning('OSD will not be able to start without this information:') terminal.warning(' "data": "/path/to/device",') logger.warning('Unable to determine device mounted on %s' % args.osd_path)
def activate_all(self, args): listed_osds = direct_report() osds = {} for osd_id, devices in listed_osds.items(): # the metadata for all devices in each OSD will contain # the FSID which is required for activation for device in devices: fsid = device.get('tags', {}).get('ceph.osd_fsid') if fsid: osds[fsid] = osd_id break if not osds: terminal.warning('Was unable to find any OSDs to activate') terminal.warning( 'Verify OSDs are present with "ceph-volume lvm list"') return for osd_fsid, osd_id in osds.items(): if systemctl.osd_is_active(osd_id): terminal.warning( 'OSD ID %s FSID %s process is active. Skipping activation' % (osd_id, osd_fsid)) else: terminal.info('Activating OSD ID %s FSID %s' % (osd_id, osd_fsid)) self.activate(args, osd_id=osd_id, osd_fsid=osd_fsid)
def enable_systemd_units(self, osd_id, osd_fsid): """ * disables the ceph-disk systemd units to prevent them from running when a UDEV event matches Ceph rules * creates the ``simple`` systemd units to handle the activation and startup of the OSD with ``osd_id`` and ``osd_fsid`` * enables the OSD systemd unit and finally starts the OSD. """ if not self.from_trigger and not self.skip_systemd: # means it was scanned and now activated directly, so ensure that # ceph-disk units are disabled, and that the `simple` systemd unit # is created and enabled # enable the ceph-volume unit for this OSD systemctl.enable_volume(osd_id, osd_fsid, 'simple') # disable any/all ceph-disk units systemctl.mask_ceph_disk() terminal.warning( ('All ceph-disk systemd units have been disabled to ' 'prevent OSDs getting triggered by UDEV events') ) else: terminal.info('Skipping enabling of `simple` systemd unit') terminal.info('Skipping masking of ceph-disk systemd units') if not self.skip_systemd: # enable the OSD systemctl.enable_osd(osd_id) # start the OSD systemctl.start_osd(osd_id) else: terminal.info( 'Skipping enabling and starting OSD simple systemd unit because --no-systemd was used' )
def setup(name='ceph-volume.log', log_path=None): log_path = log_path or conf.log_path # if a non-root user calls help or other no-sudo-required command the # logger will fail to write to /var/lib/ceph/ so this /tmp/ path is used as # a fallback tmp_log_file = os.path.join('/tmp/', name) root_logger = logging.getLogger() # The default path is where all ceph log files are, and will get rotated by # Ceph's logrotate rules. root_logger.setLevel(logging.DEBUG) try: fh = logging.FileHandler(log_path) except (OSError, IOError) as err: terminal.warning("Falling back to /tmp/ for logging. Can't use %s" % log_path) terminal.warning(str(err)) conf.log_path = tmp_log_file fh = logging.FileHandler(tmp_log_file) fh.setLevel(logging.DEBUG) fh.setFormatter(logging.Formatter(FILE_FORMAT)) root_logger.addHandler(fh)
def activate_all(self, args): listed_osds = direct_report() osds = {} for osd_id, devices in listed_osds.items(): # the metadata for all devices in each OSD will contain # the FSID which is required for activation for device in devices: fsid = device.get('tags', {}).get('ceph.osd_fsid') if fsid: osds[fsid] = osd_id break if not osds: terminal.warning('Was unable to find any OSDs to activate') terminal.warning('Verify OSDs are present with "ceph-volume lvm list"') return for osd_fsid, osd_id in osds.items(): if systemctl.osd_is_active(osd_id): terminal.warning( 'OSD ID %s FSID %s process is active. Skipping activation' % (osd_id, osd_fsid) ) else: terminal.info('Activating OSD ID %s FSID %s' % (osd_id, osd_fsid)) self.activate(args, osd_id=osd_id, osd_fsid=osd_fsid)
def activate(self, args): with open(args.json_config, 'r') as fp: osd_metadata = json.load(fp) osd_id = osd_metadata.get('whoami', args.osd_id) osd_fsid = osd_metadata.get('fsid', args.osd_fsid) cluster_name = osd_metadata.get('cluster_name', 'ceph') osd_dir = '/var/lib/ceph/osd/%s-%s' % (cluster_name, osd_id) data_uuid = osd_metadata.get('data', {}).get('uuid') if not data_uuid: raise RuntimeError( 'Unable to activate OSD %s - no "uuid" key found for data' % args.osd_id ) data_device = disk.get_device_from_partuuid(data_uuid) journal_device = disk.get_device_from_partuuid(osd_metadata.get('journal', {}).get('uuid')) block_device = disk.get_device_from_partuuid(osd_metadata.get('block', {}).get('uuid')) block_db_device = disk.get_device_from_partuuid(osd_metadata.get('block.db', {}).get('uuid')) block_wal_device = disk.get_device_from_partuuid( osd_metadata.get('block.wal', {}).get('uuid') ) if not system.device_is_mounted(data_device, destination=osd_dir): process.run(['mount', '-v', data_device, osd_dir]) device_map = { 'journal': journal_device, 'block': block_device, 'block.db': block_db_device, 'block.wal': block_wal_device } for name, device in device_map.items(): if not device: continue # always re-do the symlink regardless if it exists, so that the journal # device path that may have changed can be mapped correctly every time destination = os.path.join(osd_dir, name) process.run(['ln', '-snf', device, destination]) # make sure that the journal has proper permissions system.chown(device) if not self.systemd: # enable the ceph-volume unit for this OSD systemctl.enable_volume(osd_id, osd_fsid, 'simple') # disable any/all ceph-disk units systemctl.mask_ceph_disk() # enable the OSD systemctl.enable_osd(osd_id) # start the OSD systemctl.start_osd(osd_id) terminal.success('Successfully activated OSD %s with FSID %s' % (osd_id, osd_fsid)) terminal.warning( ('All ceph-disk systemd units have been disabled to ' 'prevent OSDs getting triggered by UDEV events') )
def main(self): sub_command_help = dedent(""" Scan running OSDs, an OSD directory (or data device) for files and configurations that will allow to take over the management of the OSD. Scanned OSDs will get their configurations stored in /etc/ceph/osd/<id>-<fsid>.json For an OSD ID of 0 with fsid of ``a9d50838-e823-43d6-b01f-2f8d0a77afc2`` that could mean a scan command that looks like:: ceph-volume lvm scan /var/lib/ceph/osd/ceph-0 Which would store the metadata in a JSON file at:: /etc/ceph/osd/0-a9d50838-e823-43d6-b01f-2f8d0a77afc2.json To scan all running OSDs: ceph-volume simple scan To a scan a specific running OSD: ceph-volume simple scan /var/lib/ceph/osd/{cluster}-{osd id} And to scan a device (mounted or unmounted) that has OSD data in it, for example /dev/sda1 ceph-volume simple scan /dev/sda1 Scanning a device or directory that belongs to an OSD not created by ceph-disk will be ingored. """) parser = argparse.ArgumentParser( prog='ceph-volume simple scan', formatter_class=argparse.RawDescriptionHelpFormatter, description=sub_command_help, ) parser.add_argument( '-f', '--force', action='store_true', help='If OSD has already been scanned, the JSON file will be overwritten' ) parser.add_argument( '--stdout', action='store_true', help='Do not save to a file, output metadata to stdout' ) parser.add_argument( 'osd_path', metavar='OSD_PATH', type=arg_validators.OSDPath(), nargs='?', default=None, help='Path to an existing OSD directory or OSD data partition' ) args = parser.parse_args(self.argv) paths = [] if args.osd_path: paths.append(args.osd_path) else: osd_ids = systemctl.get_running_osd_ids() for osd_id in osd_ids: paths.append("/var/lib/ceph/osd/{}-{}".format( conf.cluster, osd_id, )) # Capture some environment status, so that it can be reused all over self.device_mounts = system.get_mounts(devices=True) self.path_mounts = system.get_mounts(paths=True) for path in paths: args.osd_path = path device = Device(args.osd_path) if device.is_partition: if device.ceph_disk.type != 'data': label = device.ceph_disk.partlabel msg = 'Device must be the ceph data partition, but PARTLABEL reported: "%s"' % label raise RuntimeError(msg) self.encryption_metadata = encryption.legacy_encrypted(args.osd_path) self.is_encrypted = self.encryption_metadata['encrypted'] device = Device(self.encryption_metadata['device']) if not device.is_ceph_disk_member: terminal.warning("Ignoring %s because it's not a ceph-disk created osd." % path) else: self.scan(args)
def scan(self, args): osd_metadata = {'cluster_name': conf.cluster} osd_path = None logger.info('detecting if argument is a device or a directory: %s', args.osd_path) if os.path.isdir(args.osd_path): logger.info('will scan directly, path is a directory') osd_path = args.osd_path else: # assume this is a device, check if it is mounted and use that path logger.info('path is not a directory, will check if mounted') if system.device_is_mounted(args.osd_path): logger.info('argument is a device, which is mounted') mounted_osd_paths = self.device_mounts.get(args.osd_path) osd_path = mounted_osd_paths[0] if len(mounted_osd_paths) else None # argument is not a directory, and it is not a device that is mounted # somewhere so temporarily mount it to poke inside, otherwise, scan # directly if not osd_path: # check if we have an encrypted device first, so that we can poke at # the lockbox instead if self.is_encrypted: if not self.encryption_metadata.get('lockbox'): raise RuntimeError( 'Lockbox partition was not found for device: %s' % args.osd_path ) osd_metadata = self.scan_encrypted() else: logger.info('device is not mounted, will mount it temporarily to scan') with system.tmp_mount(args.osd_path) as osd_path: osd_metadata = self.scan_directory(osd_path) else: if self.is_encrypted: logger.info('will scan encrypted OSD directory at path: %s', osd_path) osd_metadata = self.scan_encrypted(osd_path) else: logger.info('will scan OSD directory at path: %s', osd_path) osd_metadata = self.scan_directory(osd_path) osd_id = osd_metadata['whoami'] osd_fsid = osd_metadata['fsid'] filename = '%s-%s.json' % (osd_id, osd_fsid) json_path = os.path.join(self.etc_path, filename) if os.path.exists(json_path) and not args.stdout: if not args.force: raise RuntimeError( '--force was not used and OSD metadata file exists: %s' % json_path ) if args.stdout: print(json.dumps(osd_metadata, indent=4, sort_keys=True, ensure_ascii=False)) else: with open(json_path, 'w') as fp: json.dump(osd_metadata, fp, indent=4, sort_keys=True, ensure_ascii=False) fp.write(os.linesep) terminal.success( 'OSD %s got scanned and metadata persisted to file: %s' % ( osd_id, json_path ) ) terminal.success( 'To take over management of this scanned OSD, and disable ceph-disk and udev, run:' ) terminal.success(' ceph-volume simple activate %s %s' % (osd_id, osd_fsid)) if not osd_metadata.get('data'): msg = 'Unable to determine device mounted on %s' % args.osd_path logger.warning(msg) terminal.warning(msg) terminal.warning('OSD will not be able to start without this information:') terminal.warning(' "data": "/path/to/device",') logger.warning('Unable to determine device mounted on %s' % args.osd_path)
def activate(self, args): with open(args.json_config, 'r') as fp: osd_metadata = json.load(fp) # Make sure that required devices are configured self.validate_devices(osd_metadata) osd_id = osd_metadata.get('whoami', args.osd_id) osd_fsid = osd_metadata.get('fsid', args.osd_fsid) data_uuid = osd_metadata.get('data', {}).get('uuid') conf.cluster = osd_metadata.get('cluster_name', 'ceph') if not data_uuid: raise RuntimeError( 'Unable to activate OSD %s - no "uuid" key found for data' % args.osd_id) # Encryption detection, and capturing of the keys to decrypt self.is_encrypted = osd_metadata.get('encrypted', False) self.encryption_type = osd_metadata.get('encryption_type') if self.is_encrypted: lockbox_secret = osd_metadata.get('lockbox.keyring') # write the keyring always so that we can unlock encryption_utils.write_lockbox_keyring(osd_id, osd_fsid, lockbox_secret) # Store the secret around so that the decrypt method can reuse raw_dmcrypt_secret = encryption_utils.get_dmcrypt_key( osd_id, osd_fsid) # Note how both these calls need b64decode. For some reason, the # way ceph-disk creates these keys, it stores them in the monitor # *undecoded*, requiring this decode call again. The lvm side of # encryption doesn't need it, so we are assuming here that anything # that `simple` scans, will come from ceph-disk and will need this # extra decode call here self.dmcrypt_secret = base64.b64decode(raw_dmcrypt_secret) cluster_name = osd_metadata.get('cluster_name', 'ceph') osd_dir = '/var/lib/ceph/osd/%s-%s' % (cluster_name, osd_id) # XXX there is no support for LVM here data_device = self.get_device(data_uuid) journal_device = self.get_device( osd_metadata.get('journal', {}).get('uuid')) block_device = self.get_device( osd_metadata.get('block', {}).get('uuid')) block_db_device = self.get_device( osd_metadata.get('block.db', {}).get('uuid')) block_wal_device = self.get_device( osd_metadata.get('block.wal', {}).get('uuid')) if not system.device_is_mounted(data_device, destination=osd_dir): process.run(['mount', '-v', data_device, osd_dir]) device_map = { 'journal': journal_device, 'block': block_device, 'block.db': block_db_device, 'block.wal': block_wal_device } for name, device in device_map.items(): if not device: continue # always re-do the symlink regardless if it exists, so that the journal # device path that may have changed can be mapped correctly every time destination = os.path.join(osd_dir, name) process.run(['ln', '-snf', device, destination]) # make sure that the journal has proper permissions system.chown(device) if not self.systemd: # enable the ceph-volume unit for this OSD systemctl.enable_volume(osd_id, osd_fsid, 'simple') # disable any/all ceph-disk units systemctl.mask_ceph_disk() # enable the OSD systemctl.enable_osd(osd_id) # start the OSD systemctl.start_osd(osd_id) terminal.success('Successfully activated OSD %s with FSID %s' % (osd_id, osd_fsid)) terminal.warning(('All ceph-disk systemd units have been disabled to ' 'prevent OSDs getting triggered by UDEV events'))
def activate(self, args): with open(args.json_config, 'r') as fp: osd_metadata = json.load(fp) osd_id = osd_metadata.get('whoami', args.osd_id) osd_fsid = osd_metadata.get('fsid', args.osd_fsid) cluster_name = osd_metadata.get('cluster_name', 'ceph') osd_dir = '/var/lib/ceph/osd/%s-%s' % (cluster_name, osd_id) data_uuid = osd_metadata.get('data', {}).get('uuid') if not data_uuid: raise RuntimeError( 'Unable to activate OSD %s - no "uuid" key found for data' % args.osd_id ) data_device = disk.get_device_from_partuuid(data_uuid) journal_device = disk.get_device_from_partuuid(osd_metadata.get('journal', {}).get('uuid')) block_device = disk.get_device_from_partuuid(osd_metadata.get('block', {}).get('uuid')) block_db_device = disk.get_device_from_partuuid(osd_metadata.get('block.db', {}).get('uuid')) block_wal_device = disk.get_device_from_partuuid( osd_metadata.get('block.wal', {}).get('uuid') ) if not system.device_is_mounted(data_device, destination=osd_dir): process.run(['sudo', 'mount', '-v', data_device, osd_dir]) device_map = { 'journal': journal_device, 'block': block_device, 'block.db': block_db_device, 'block.wal': block_wal_device } for name, device in device_map.items(): if not device: continue # always re-do the symlink regardless if it exists, so that the journal # device path that may have changed can be mapped correctly every time destination = os.path.join(osd_dir, name) process.run(['sudo', 'ln', '-snf', device, destination]) # make sure that the journal has proper permissions system.chown(device) if not self.systemd: # enable the ceph-volume unit for this OSD systemctl.enable_volume(osd_id, osd_fsid, 'simple') # disable any/all ceph-disk units systemctl.mask_ceph_disk() # enable the OSD systemctl.enable_osd(osd_id) # start the OSD systemctl.start_osd(osd_id) if not self.systemd: terminal.success('Successfully activated OSD %s with FSID %s' % (osd_id, osd_fsid)) terminal.warning( ('All ceph-disk systemd units have been disabled to ' 'prevent OSDs getting triggered by UDEV events') )
def main(self): sub_command_help = dedent(""" Activate OSDs by mounting devices previously configured to their appropriate destination:: ceph-volume simple activate {ID} {FSID} Or using a JSON file directly:: ceph-volume simple activate --file /etc/ceph/osd/{ID}-{FSID}.json The OSD must have been "scanned" previously (see ``ceph-volume simple scan``), so that all needed OSD device information and metadata exist. A previously scanned OSD would exist like:: /etc/ceph/osd/{ID}-{FSID}.json Environment variables supported: CEPH_VOLUME_SIMPLE_JSON_DIR: Directory location for scanned OSD JSON configs """) parser = argparse.ArgumentParser( prog='ceph-volume simple activate', formatter_class=argparse.RawDescriptionHelpFormatter, description=sub_command_help, ) parser.add_argument( 'osd_id', metavar='ID', nargs='?', help='The ID of the OSD, usually an integer, like 0') parser.add_argument('osd_fsid', metavar='FSID', nargs='?', help='The FSID of the OSD, similar to a SHA1') parser.add_argument( '--all', help='Activate all OSDs with a OSD JSON config', action='store_true', default=False, ) parser.add_argument('--file', help='The path to a JSON file, from a scanned OSD') parser.add_argument( '--no-systemd', dest='skip_systemd', action='store_true', help= 'Skip creating and enabling systemd units and starting OSD services', ) if len(self.argv) == 0: print(sub_command_help) return args = parser.parse_args(self.argv) if not args.file and not args.all: if not args.osd_id and not args.osd_fsid: terminal.error( 'ID and FSID are required to find the right OSD to activate' ) terminal.error('from a scanned OSD location in /etc/ceph/osd/') raise RuntimeError( 'Unable to activate without both ID and FSID') # don't allow a CLI flag to specify the JSON dir, because that might # implicitly indicate that it would be possible to activate a json file # at a non-default location which would not work at boot time if the # custom location is not exposed through an ENV var self.skip_systemd = args.skip_systemd json_dir = os.environ.get('CEPH_VOLUME_SIMPLE_JSON_DIR', '/etc/ceph/osd/') if args.all: if args.file or args.osd_id: mlogger.warn( '--all was passed, ignoring --file and ID/FSID arguments') json_configs = glob.glob('{}/*.json'.format(json_dir)) for json_config in json_configs: mlogger.info( 'activating OSD specified in {}'.format(json_config)) args.json_config = json_config try: self.activate(args) except RuntimeError as e: terminal.warning(e.message) else: if args.file: json_config = args.file else: json_config = os.path.join( json_dir, '%s-%s.json' % (args.osd_id, args.osd_fsid)) if not os.path.exists(json_config): raise RuntimeError('Expected JSON config path not found: %s' % json_config) args.json_config = json_config self.activate(args)
def activate(self, args): with open(args.json_config, 'r') as fp: osd_metadata = json.load(fp) # Make sure that required devices are configured self.validate_devices(osd_metadata) osd_id = osd_metadata.get('whoami', args.osd_id) osd_fsid = osd_metadata.get('fsid', args.osd_fsid) data_uuid = osd_metadata.get('data', {}).get('uuid') conf.cluster = osd_metadata.get('cluster_name', 'ceph') if not data_uuid: raise RuntimeError( 'Unable to activate OSD %s - no "uuid" key found for data' % args.osd_id ) # Encryption detection, and capturing of the keys to decrypt self.is_encrypted = osd_metadata.get('encrypted', False) self.encryption_type = osd_metadata.get('encryption_type') if self.is_encrypted: lockbox_secret = osd_metadata.get('lockbox.keyring') # write the keyring always so that we can unlock encryption_utils.write_lockbox_keyring(osd_id, osd_fsid, lockbox_secret) # Store the secret around so that the decrypt method can reuse raw_dmcrypt_secret = encryption_utils.get_dmcrypt_key(osd_id, osd_fsid) # Note how both these calls need b64decode. For some reason, the # way ceph-disk creates these keys, it stores them in the monitor # *undecoded*, requiring this decode call again. The lvm side of # encryption doesn't need it, so we are assuming here that anything # that `simple` scans, will come from ceph-disk and will need this # extra decode call here self.dmcrypt_secret = base64.b64decode(raw_dmcrypt_secret) cluster_name = osd_metadata.get('cluster_name', 'ceph') osd_dir = '/var/lib/ceph/osd/%s-%s' % (cluster_name, osd_id) # XXX there is no support for LVM here data_device = self.get_device(data_uuid) journal_device = self.get_device(osd_metadata.get('journal', {}).get('uuid')) block_device = self.get_device(osd_metadata.get('block', {}).get('uuid')) block_db_device = self.get_device(osd_metadata.get('block.db', {}).get('uuid')) block_wal_device = self.get_device(osd_metadata.get('block.wal', {}).get('uuid')) if not system.device_is_mounted(data_device, destination=osd_dir): process.run(['mount', '-v', data_device, osd_dir]) device_map = { 'journal': journal_device, 'block': block_device, 'block.db': block_db_device, 'block.wal': block_wal_device } for name, device in device_map.items(): if not device: continue # always re-do the symlink regardless if it exists, so that the journal # device path that may have changed can be mapped correctly every time destination = os.path.join(osd_dir, name) process.run(['ln', '-snf', device, destination]) # make sure that the journal has proper permissions system.chown(device) if not self.systemd: # enable the ceph-volume unit for this OSD systemctl.enable_volume(osd_id, osd_fsid, 'simple') # disable any/all ceph-disk units systemctl.mask_ceph_disk() # enable the OSD systemctl.enable_osd(osd_id) # start the OSD systemctl.start_osd(osd_id) terminal.success('Successfully activated OSD %s with FSID %s' % (osd_id, osd_fsid)) terminal.warning( ('All ceph-disk systemd units have been disabled to ' 'prevent OSDs getting triggered by UDEV events') )