def assess_status(): '''Assess status of current unit''' application_version_set(get_upstream_version(VERSION_PACKAGE)) # Check that the no-bootstrap config option is set in conjunction with # having the bootstrap-source relation established if not config('no-bootstrap') and is_relation_made('bootstrap-source'): status_set( 'blocked', 'Cannot join the bootstrap-source relation when ' 'no-bootstrap is False') return moncount = int(config('monitor-count')) units = get_peer_units() # not enough peers and mon_count > 1 if len(units.keys()) < moncount: status_set( 'blocked', 'Insufficient peer units to bootstrap' ' cluster (require {})'.format(moncount)) return # mon_count > 1, peers, but no ceph-public-address ready = sum(1 for unit_ready in units.values() if unit_ready) if ready < moncount: status_set('waiting', 'Peer units detected, waiting for addresses') return # active - bootstrapped + quorum status check if ceph.is_bootstrapped() and ceph.is_quorum(): status_set('active', 'Unit is ready and clustered') else: # Unit should be running and clustered, but no quorum # TODO: should this be blocked or waiting? status_set('blocked', 'Unit not clustered (no quorum)')
def config_changed(): # Get the cfg object so we can see if the no-bootstrap value has changed # and triggered this hook invocation cfg = config() if config('prefer-ipv6'): assert_charm_supports_ipv6() check_for_upgrade() log('Monitor hosts are ' + repr(get_mon_hosts())) sysctl_dict = config('sysctl') if sysctl_dict: create_sysctl(sysctl_dict, '/etc/sysctl.d/50-ceph-charm.conf') if relations_of_type('nrpe-external-master'): update_nrpe_config() if is_leader(): if not config('no-bootstrap'): if not leader_get('fsid') or not leader_get('monitor-secret'): if config('fsid'): fsid = config('fsid') else: fsid = "{}".format(uuid.uuid1()) if config('monitor-secret'): mon_secret = config('monitor-secret') else: mon_secret = "{}".format(ceph.generate_monitor_secret()) status_set('maintenance', 'Creating FSID and Monitor Secret') opts = { 'fsid': fsid, 'monitor-secret': mon_secret, } log("Settings for the cluster are: {}".format(opts)) leader_set(opts) elif cfg.changed('no-bootstrap') and \ is_relation_made('bootstrap-source'): # User changed the no-bootstrap config option, we're the leader, # and the bootstrap-source relation has been made. The charm should # be in a blocked state indicating that the no-bootstrap option # must be set. This block is invoked when the user is trying to # get out of that scenario by enabling no-bootstrap. bootstrap_source_relation_changed() elif leader_get('fsid') is None or leader_get('monitor-secret') is None: log('still waiting for leader to setup keys') status_set('waiting', 'Waiting for leader to setup keys') sys.exit(0) emit_cephconf() # Support use of single node ceph if (not ceph.is_bootstrapped() and int(config('monitor-count')) == 1 and is_leader()): status_set('maintenance', 'Bootstrapping single Ceph MON') ceph.bootstrap_monitor_cluster(leader_get('monitor-secret')) ceph.wait_for_bootstrap() if cmp_pkgrevno('ceph', '12.0.0') >= 0: status_set('maintenance', 'Bootstrapping single Ceph MGR') ceph.bootstrap_manager()
def assess_status(): '''Assess status of current unit''' application_version_set(get_upstream_version(VERSION_PACKAGE)) if is_unit_upgrading_set(): status_set( "blocked", "Ready for do-release-upgrade and reboot. " "Set complete when finished.") return # Check that the no-bootstrap config option is set in conjunction with # having the bootstrap-source relation established if not config('no-bootstrap') and is_relation_made('bootstrap-source'): status_set( 'blocked', 'Cannot join the bootstrap-source relation when ' 'no-bootstrap is False') return moncount = int(config('monitor-count')) units = get_peer_units() # not enough peers and mon_count > 1 if len(units.keys()) < moncount: status_set( 'blocked', 'Insufficient peer units to bootstrap' ' cluster (require {})'.format(moncount)) return # mon_count > 1, peers, but no ceph-public-address ready = sum(1 for unit_ready in units.values() if unit_ready) if ready < moncount: status_set('waiting', 'Peer units detected, waiting for addresses') return configured_rbd_features = config('default-rbd-features') if has_rbd_mirrors() and configured_rbd_features: if add_rbd_mirror_features( configured_rbd_features) != configured_rbd_features: # The configured RBD features bitmap does not contain the features # required for RBD Mirroring status_set( 'blocked', 'Configuration mismatch: RBD Mirroring ' 'enabled but incorrect value set for ' '``default-rbd-features``') return # active - bootstrapped + quorum status check if ceph.is_bootstrapped() and ceph.is_quorum(): expected_osd_count = config('expected-osd-count') or 3 if sufficient_osds(expected_osd_count): status_set('active', 'Unit is ready and clustered') else: status_set( 'waiting', 'Monitor bootstrapped but waiting for number of' ' OSDs to reach expected-osd-count ({})'.format( expected_osd_count)) else: # Unit should be running and clustered, but no quorum # TODO: should this be blocked or waiting? status_set('blocked', 'Unit not clustered (no quorum)')
def prepare_disks_and_activate(): # NOTE: vault/vaultlocker preflight check vault_kv = vaultlocker.VaultKVContext(vaultlocker.VAULTLOCKER_BACKEND) context = vault_kv() if use_vaultlocker() and not vault_kv.complete: log('Deferring OSD preparation as vault not ready', level=DEBUG) return elif use_vaultlocker() and vault_kv.complete: log('Vault ready, writing vaultlocker configuration', level=DEBUG) vaultlocker.write_vaultlocker_conf(context) osd_journal = get_journal_devices() if not osd_journal.isdisjoint(set(get_devices())): raise ValueError('`osd-journal` and `osd-devices` options must not' 'overlap.') log("got journal devs: {}".format(osd_journal), level=DEBUG) # pre-flight check of eligible device pristinity devices = get_devices() # if a device has been previously touched we need to consider it as # non-pristine. If it needs to be re-processed it has to be zapped # via the respective action which also clears the unitdata entry. db = kv() touched_devices = db.get('osd-devices', []) devices = [dev for dev in devices if dev not in touched_devices] log('Skipping osd devices previously processed by this unit: {}'.format( touched_devices)) # filter osd-devices that are file system paths devices = [dev for dev in devices if dev.startswith('/dev')] # filter osd-devices that does not exist on this unit devices = [dev for dev in devices if os.path.exists(dev)] # filter osd-devices that are already mounted devices = [dev for dev in devices if not is_device_mounted(dev)] # filter osd-devices that are active bluestore devices devices = [ dev for dev in devices if not ceph.is_active_bluestore_device(dev) ] log('Checking for pristine devices: "{}"'.format(devices), level=DEBUG) if not all(ceph.is_pristine_disk(dev) for dev in devices): status_set( 'blocked', 'Non-pristine devices detected, consult ' '`list-disks`, `zap-disk` and `blacklist-*` actions.') return if ceph.is_bootstrapped(): log('ceph bootstrapped, rescanning disks') emit_cephconf() for dev in get_devices(): ceph.osdize(dev, config('osd-format'), osd_journal, config('ignore-device-errors'), config('osd-encrypt'), config('bluestore'), config('osd-encrypt-keymanager')) # Make it fast! if config('autotune'): ceph.tune_dev(dev) ceph.start_osds(get_devices())
def mon_relation(): if leader_get('monitor-secret') is None: log('still waiting for leader to setup keys') status_set('waiting', 'Waiting for leader to setup keys') return emit_cephconf() moncount = int(config('monitor-count')) if len(get_mon_hosts()) >= moncount: if not ceph.is_bootstrapped(): status_set('maintenance', 'Bootstrapping MON cluster') # the following call raises an exception # if it can't add the keyring try: ceph.bootstrap_monitor_cluster(leader_get('monitor-secret')) except FileNotFoundError as e: # NOQA -- PEP8 is still PY2 log("Couldn't bootstrap the monitor yet: {}".format(str(e))) exit(0) ceph.wait_for_bootstrap() ceph.wait_for_quorum() if cmp_pkgrevno('ceph', '12.0.0') >= 0: status_set('maintenance', 'Bootstrapping Ceph MGR') ceph.bootstrap_manager() # If we can and want to if is_leader() and config('customize-failure-domain'): # But only if the environment supports it if os.environ.get('JUJU_AVAILABILITY_ZONE'): cmds = [ "ceph osd getcrushmap -o /tmp/crush.map", "crushtool -d /tmp/crush.map| " "sed 's/step chooseleaf firstn 0 type host/step " "chooseleaf firstn 0 type rack/' > " "/tmp/crush.decompiled", "crushtool -c /tmp/crush.decompiled -o /tmp/crush.map", "crushtool -i /tmp/crush.map --test", "ceph osd setcrushmap -i /tmp/crush.map" ] for cmd in cmds: try: subprocess.check_call(cmd, shell=True) except subprocess.CalledProcessError as e: log("Failed to modify crush map:", level='error') log("Cmd: {}".format(cmd), level='error') log("Error: {}".format(e.output), level='error') break else: log("Your Juju environment doesn't" "have support for Availability Zones") notify_osds() notify_radosgws() notify_client() else: log('Not enough mons ({}), punting.'.format(len(get_mon_hosts())))
def prepare_disks_and_activate(): osd_journal = get_journal_devices() check_overlap(osd_journal, set(get_devices())) log("got journal devs: {}".format(osd_journal), level=DEBUG) already_zapped = read_zapped_journals() non_zapped = osd_journal - already_zapped for journ in non_zapped: ceph.maybe_zap_journal(journ) write_zapped_journals(osd_journal) if ceph.is_bootstrapped(): log('ceph bootstrapped, rescanning disks') emit_cephconf() for dev in get_devices(): ceph.osdize(dev, config('osd-format'), osd_journal, config('osd-reformat'), config('ignore-device-errors'), config('osd-encrypt'), config('bluestore')) # Make it fast! if config('autotune'): ceph.tune_dev(dev) ceph.start_osds(get_devices())
def check_for_upgrade(): if not ceph.is_bootstrapped(): log("Ceph is not bootstrapped, skipping upgrade checks.") return c = hookenv.config() old_version = ceph.resolve_ceph_version(c.previous('source') or 'distro') log('old_version: {}'.format(old_version)) new_version = ceph.resolve_ceph_version(hookenv.config('source') or 'distro') log('new_version: {}'.format(new_version)) # May be in a previous upgrade that was failed if the directories # still need an ownership update. Check this condition. resuming_upgrade = ceph.dirs_need_ownership_update('osd') if old_version == new_version and not resuming_upgrade: log("No new ceph version detected, skipping upgrade.", DEBUG) return if (ceph.UPGRADE_PATHS.get(old_version) == new_version) or\ resuming_upgrade: if old_version == new_version: log('Attempting to resume possibly failed upgrade.', INFO) else: log("{} to {} is a valid upgrade path. Proceeding.".format( old_version, new_version)) emit_cephconf(upgrading=True) ceph.roll_osd_cluster(new_version=new_version, upgrade_key='osd-upgrade') emit_cephconf(upgrading=False) else: # Log a helpful error message log("Invalid upgrade path from {} to {}. " "Valid paths are: {}".format(old_version, new_version, ceph.pretty_print_upgrade_paths()))
def check_for_upgrade(): if not ceph.is_bootstrapped(): log("Ceph is not bootstrapped, skipping upgrade checks.") return c = hookenv.config() old_version = ceph.resolve_ceph_version(c.previous('source') or 'distro') log('old_version: {}'.format(old_version)) # Strip all whitespace new_version = ceph.resolve_ceph_version(hookenv.config('source')) log('new_version: {}'.format(new_version)) if (old_version in ceph.UPGRADE_PATHS and new_version == ceph.UPGRADE_PATHS[old_version]): log("{} to {} is a valid upgrade path. Proceeding.".format( old_version, new_version)) ceph.roll_monitor_cluster(new_version=new_version, upgrade_key='admin') else: # Log a helpful error message log("Invalid upgrade path from {} to {}. " "Valid paths are: {}".format(old_version, new_version, ceph.pretty_print_upgrade_paths()))
def check_for_upgrade(): if not ceph.is_bootstrapped(): log("Ceph is not bootstrapped, skipping upgrade checks.") return c = hookenv.config() old_version = ceph.resolve_ceph_version(c.previous('source') or 'distro') log('old_version: {}'.format(old_version)) # Strip all whitespace new_version = ceph.resolve_ceph_version(hookenv.config('source')) old_version_os = get_os_codename_install_source( c.previous('source') or 'distro') new_version_os = get_os_codename_install_source(hookenv.config('source')) log('new_version: {}'.format(new_version)) if (old_version in ceph.UPGRADE_PATHS and new_version == ceph.UPGRADE_PATHS[old_version]): log("{} to {} is a valid upgrade path. Proceeding.".format( old_version, new_version)) ceph.roll_monitor_cluster(new_version=new_version, upgrade_key='admin') elif (old_version == new_version and old_version_os < new_version_os): # See LP: #1778823 add_source(hookenv.config('source'), hookenv.config('key')) log(("The installation source has changed yet there is no new major " "version of Ceph in this new source. As a result no package " "upgrade will take effect. Please upgrade manually if you need " "to."), level=INFO) else: # Log a helpful error message log("Invalid upgrade path from {} to {}. " "Valid paths are: {}".format(old_version, new_version, ceph.pretty_print_upgrade_paths()), level=ERROR)
def prepare_disks_and_activate(): # NOTE: vault/vaultlocker preflight check vault_kv = vaultlocker.VaultKVContext(vaultlocker.VAULTLOCKER_BACKEND) context = vault_kv() if use_vaultlocker() and not vault_kv.complete: log('Deferring OSD preparation as vault not ready', level=DEBUG) return elif use_vaultlocker() and vault_kv.complete: log('Vault ready, writing vaultlocker configuration', level=DEBUG) vaultlocker.write_vaultlocker_conf(context) osd_journal = get_journal_devices() if not osd_journal.isdisjoint(set(get_devices())): raise ValueError('`osd-journal` and `osd-devices` options must not' 'overlap.') log("got journal devs: {}".format(osd_journal), level=DEBUG) # pre-flight check of eligible device pristinity devices = get_devices() # if a device has been previously touched we need to consider it as # non-pristine. If it needs to be re-processed it has to be zapped # via the respective action which also clears the unitdata entry. db = kv() touched_devices = db.get('osd-devices', []) devices = [dev for dev in devices if dev not in touched_devices] log('Skipping osd devices previously processed by this unit: {}' .format(touched_devices)) # filter osd-devices that are file system paths devices = [dev for dev in devices if dev.startswith('/dev')] # filter osd-devices that does not exist on this unit devices = [dev for dev in devices if os.path.exists(dev)] # filter osd-devices that are already mounted devices = [dev for dev in devices if not is_device_mounted(dev)] # filter osd-devices that are active bluestore devices devices = [dev for dev in devices if not ceph.is_active_bluestore_device(dev)] log('Checking for pristine devices: "{}"'.format(devices), level=DEBUG) if not all(ceph.is_pristine_disk(dev) for dev in devices): status_set('blocked', 'Non-pristine devices detected, consult ' '`list-disks`, `zap-disk` and `blacklist-*` actions.') return if ceph.is_bootstrapped(): log('ceph bootstrapped, rescanning disks') emit_cephconf() bluestore = use_bluestore() ceph.udevadm_settle() for dev in get_devices(): ceph.osdize(dev, config('osd-format'), osd_journal, config('ignore-device-errors'), config('osd-encrypt'), bluestore, config('osd-encrypt-keymanager')) # Make it fast! if config('autotune'): ceph.tune_dev(dev) ceph.start_osds(get_devices()) # Notify MON cluster as to how many OSD's this unit bootstrapped # into the cluster for r_id in relation_ids('mon'): relation_set( relation_id=r_id, relation_settings={ 'bootstrapped-osds': len(db.get('osd-devices', [])) } )
def config_changed(): # Get the cfg object so we can see if the no-bootstrap value has changed # and triggered this hook invocation cfg = config() if config('prefer-ipv6'): assert_charm_supports_ipv6() check_for_upgrade() log('Monitor hosts are ' + repr(get_mon_hosts())) sysctl_dict = config('sysctl') if sysctl_dict: create_sysctl(sysctl_dict, '/etc/sysctl.d/50-ceph-charm.conf') if relations_of_type('nrpe-external-master'): update_nrpe_config() if is_leader(): if not config('no-bootstrap'): if not leader_get('fsid') or not leader_get('monitor-secret'): fsid = "{}".format(uuid.uuid1()) if config('monitor-secret'): mon_secret = config('monitor-secret') else: mon_secret = "{}".format(ceph.generate_monitor_secret()) opts = { 'fsid': fsid, 'monitor-secret': mon_secret, } try: leader_set(opts) status_set('maintenance', 'Created FSID and Monitor Secret') log("Settings for the cluster are: {}".format(opts)) except Exception as e: # we're probably not the leader an exception occured # let's log it anyway. log("leader_set failed: {}".format(str(e))) elif (cfg.changed('no-bootstrap') and is_relation_made('bootstrap-source')): # User changed the no-bootstrap config option, we're the leader, # and the bootstrap-source relation has been made. The charm should # be in a blocked state indicating that the no-bootstrap option # must be set. This block is invoked when the user is trying to # get out of that scenario by enabling no-bootstrap. bootstrap_source_relation_changed() # unconditionally verify that the fsid and monitor-secret are set now # otherwise we exit until a leader does this. if leader_get('fsid') is None or leader_get('monitor-secret') is None: log('still waiting for leader to setup keys') status_set('waiting', 'Waiting for leader to setup keys') return emit_cephconf() # Support use of single node ceph if (not ceph.is_bootstrapped() and int(config('monitor-count')) == 1 and is_leader()): status_set('maintenance', 'Bootstrapping single Ceph MON') # the following call raises an exception if it can't add the keyring try: ceph.bootstrap_monitor_cluster(leader_get('monitor-secret')) except FileNotFoundError as e: # NOQA -- PEP8 is still PY2 log("Couldn't bootstrap the monitor yet: {}".format(str(e))) return ceph.wait_for_bootstrap() if cmp_pkgrevno('ceph', '12.0.0') >= 0: status_set('maintenance', 'Bootstrapping single Ceph MGR') ceph.bootstrap_manager() # Update client relations notify_client()