def needs_restart(): '''Return True if Cassandra is not running or needs to be restarted.''' if helpers.is_decommissioned(): # Decommissioned nodes are never restarted. They remain up # telling everyone they are decommissioned. helpers.status_set('blocked', 'Decommissioned node') return False if not helpers.is_cassandra_running(): if helpers.is_bootstrapped(): helpers.status_set('waiting', 'Waiting for permission to start') else: helpers.status_set('waiting', 'Waiting for permission to bootstrap') return True config = hookenv.config() # If our IP address has changed, we need to restart. if config.changed('unit_private_ip'): helpers.status_set( 'waiting', 'IP address changed. ' 'Waiting for restart permission.') return True # If the directory paths have changed, we need to migrate data # during a restart. storage = relations.StorageRelation() if storage.needs_remount(): helpers.status_set(hookenv.status_get(), 'New mounts. Waiting for restart permission') return True # If any of these config items changed, a restart is required. for key in RESTART_REQUIRED_KEYS: if config.changed(key): hookenv.log('{} changed. Restart required.'.format(key)) for key in RESTART_REQUIRED_KEYS: if config.changed(key): helpers.status_set( hookenv.status_get(), 'Config changes. ' 'Waiting for restart permission.') return True # If we have new seeds, we should restart. new_seeds = helpers.get_seed_ips() if config.get('configured_seeds') != sorted(new_seeds): old_seeds = set(config.previous('configured_seeds') or []) changed = old_seeds.symmetric_difference(new_seeds) # We don't care about the local node in the changes. changed.discard(hookenv.unit_private_ip()) if changed: helpers.status_set( hookenv.status_get(), 'Updated seeds {!r}. ' 'Waiting for restart permission.' ''.format(new_seeds)) return True hookenv.log('Restart not required') return False
def needs_restart(): '''Return True if Cassandra is not running or needs to be restarted.''' if helpers.is_decommissioned(): # Decommissioned nodes are never restarted. They remain up # telling everyone they are decommissioned. helpers.status_set('blocked', 'Decommissioned node') return False if not helpers.is_cassandra_running(): if helpers.is_bootstrapped(): helpers.status_set('waiting', 'Waiting for permission to start') else: helpers.status_set('waiting', 'Waiting for permission to bootstrap') return True config = hookenv.config() # If our IP address has changed, we need to restart. if config.changed('unit_private_ip'): helpers.status_set('waiting', 'IP address changed. ' 'Waiting for restart permission.') return True # If the directory paths have changed, we need to migrate data # during a restart. storage = relations.StorageRelation() if storage.needs_remount(): helpers.status_set(hookenv.status_get(), 'New mounts. Waiting for restart permission') return True # If any of these config items changed, a restart is required. for key in RESTART_REQUIRED_KEYS: if config.changed(key): hookenv.log('{} changed. Restart required.'.format(key)) for key in RESTART_REQUIRED_KEYS: if config.changed(key): helpers.status_set(hookenv.status_get(), 'Config changes. ' 'Waiting for restart permission.') return True # If we have new seeds, we should restart. new_seeds = helpers.get_seed_ips() if config.get('configured_seeds') != sorted(new_seeds): old_seeds = set(config.previous('configured_seeds') or []) changed = old_seeds.symmetric_difference(new_seeds) # We don't care about the local node in the changes. changed.discard(hookenv.unit_private_ip()) if changed: helpers.status_set(hookenv.status_get(), 'Updated seeds {!r}. ' 'Waiting for restart permission.' ''.format(new_seeds)) return True hookenv.log('Restart not required') return False
def reset_auth_keyspace_replication(): # Cassandra requires you to manually set the replication factor of # the system_auth keyspace, to ensure availability and redundancy. # The recommendation is to set the replication factor so that every # node has a copy. ep = reactive.endpoint_from_name('cluster') num_nodes = len(ep.all_bootstrapped_units) + 1 datacenter = cassandra.config()['datacenter'] with cassandra.connect() as session: strategy_opts = cassandra.get_auth_keyspace_replication(session) rf = int(strategy_opts.get(datacenter, -1)) hookenv.log('Current system_auth replication strategy is {!r}'.format(strategy_opts)) if rf != num_nodes: strategy_opts['class'] = 'NetworkTopologyStrategy' strategy_opts[datacenter] = num_nodes if 'replication_factor' in strategy_opts: del strategy_opts['replication_factor'] hookenv.log('New system_auth replication strategy is {!r}'.format(strategy_opts)) status, msg = hookenv.status_get() helpers.status_set(status, 'Updating system_auth rf to {!r}'.format(strategy_opts)) cassandra.set_auth_keyspace_replication(session, strategy_opts) if rf < num_nodes: # Increasing rf, need to run repair. cassandra.repair_auth_keyspace() helpers.status_set(status, msg) reactive.set_flag('cassandra.authkeyspace.done')
def repair_auth_keyspace(): # Repair takes a long time, and may need to be retried due to 'snapshot # creation' errors, but should certainly complete within an hour since # the keyspace is tiny. status_set(hookenv.status_get(), 'Repairing system_auth keyspace') nodetool('repair', 'system_auth', timeout=3600)
def _fetch_oracle_jre(): config = hookenv.config() url = config.get('private_jre_url', None) if url and config.get('retrieved_jre', None) != url: filename = os.path.join(hookenv.charm_dir(), 'lib', url.split('/')[-1]) if not filename.endswith('-linux-x64.tar.gz'): helpers.status_set('blocked', 'Invalid private_jre_url {}'.format(url)) raise SystemExit(0) helpers.status_set(hookenv.status_get(), 'Downloading Oracle JRE') hookenv.log('Oracle JRE URL is {}'.format(url)) urllib.request.urlretrieve(url, filename) config['retrieved_jre'] = url pattern = os.path.join(hookenv.charm_dir(), 'lib', 'server-jre-?u*-linux-x64.tar.gz') tarballs = glob.glob(pattern) if not (url or tarballs): helpers.status_set('blocked', 'private_jre_url not set and no local tarballs.') raise SystemExit(0) elif not tarballs: helpers.status_set('blocked', 'Oracle JRE tarball not found ({})'.format(pattern)) raise SystemExit(0) # Latest tarball by filename/version num. Lets hope they don't hit # 99 (currently at 76). tarball = sorted(tarballs)[-1] return tarball
def config_changed(): if not conf.changed('server_port') and not conf.changed('RAM_MAX'): return log('ftb-infinity: config_changed') cur_status = status_get() status_set('maintenance', 'configuring') port_changed = conf.changed('server_port') ram_changed = conf.changed('RAM_MAX') # Let's suppose java will rewrite server.properties on exit started = is_state(CHARM_STATE_STARTED) if started: service_stop(CHARM_NAME) sleep(2) if port_changed: close_port(conf.previous('server_port')) ftb_config_server() if ram_changed: ftb_systemd_install() if started: service_start(CHARM_NAME) if port_changed: open_port(conf['server_port']) # restore state status_set(cur_status[0], cur_status[1])
def run_solr(zookeeper, java): hookenv.open_port('8983') solrcloud = hookenv.config()['solrcloud'] charmstatus = status_get() log("CHARMSTAT " + charmstatus[0]) log("CHARMSTAT " + charmstatus[1]) # TODO if status returns error then unset solr.running and status_set # TODO detect if ZK info changes and restart solr nodes if solrcloud and (charmstatus[1] != 'Solr Cloud Running'): zklist = '' for zk_unit in zookeeper.zookeepers(): zklist += add_zookeeper(zk_unit['host'], zk_unit['port']) zklist = zklist[:-1] call(['su', 'solr', '-c', '/opt/solr/bin/solr stop']) check_output([ 'su', 'solr', '-c', '/opt/solr/bin/solr start -c -p 8983 -z ' + zklist ]) status_set('active', 'Solr Cloud Running') set_state('solrcloud.running') elif not solrcloud and (charmstatus[1] != 'Solr Running(No Cloud)'): call(['su', 'solr', '-c', '/opt/solr/bin/solr stop']) check_output(['su', 'solr', '-c', '/opt/solr/bin/solr start']) status_set('active', 'Solr Running(No Cloud)') set_state('solr.running')
def restart_scheduler(): prev_state, prev_msg = hookenv.status_get() hookenv.status_set('maintenance', 'Restarting kube-scheduler') host.service_restart('snap.kube-scheduler.daemon') hookenv.status_set(prev_state, prev_msg) remove_state('kube-scheduler.do-restart') set_state('kube-scheduler.started')
def configure(force=False): config = hookenv.config() def changed(key): return force or config.changed(key) if config.changed('proxy') and config.get('proxy'): shutil.rmtree('/opt/collector-web') install() if hookenv.status_get() == 'blocked': return # We're blocked again with open('/etc/graphite/local_settings.py', 'r+') as f: contents = f.read() contents = re.sub(r'#TIME_ZONE = .*', "TIME_ZONE = 'Etc/UTC'", contents) f.seek(0, 0) f.truncate() f.write(contents) if 'juju-secret' not in config: return ini_path = '/opt/collector-web/production.ini' with open(ini_path, 'r') as f: ini = f.read() api_addresses = os.getenv('JUJU_API_ADDRESSES') if api_addresses: juju_api = 'wss://%s' % api_addresses.split()[0] ini = re.sub(r'juju.api.endpoint =.*', 'juju.api.endpoint = %s' % juju_api, ini) ini = re.sub( r'graphite.url =.*', 'graphite.url = http://%s:9001' % hookenv.unit_get('public-address'), ini) if changed('juju-user'): ini = re.sub( r'juju.api.user =.*', 'juju.api.user = %s' % config.get('juju-user') or '', ini) if changed('juju-secret'): ini = re.sub( r'juju.api.secret =.*', 'juju.api.secret = %s' % config.get('juju-secret') or '', ini) if changed('publish-url'): ini = re.sub( r'publish.url =.*', 'publish.url = %s' % config.get('publish-url') or '', ini) with open(ini_path, 'w') as f: f.write(ini) host.service_restart('collectorweb') hookenv.status_set('active', 'Ready http://%s:9000' % hookenv.unit_public_ip())
def show_pf(port_forward): state, msg = hookenv.status_get() msg = re.sub(r' pf:".*"', '', msg) msg += ' pf:"' for forward in port_forward.forwards: msg += '{}:{}->{} '.format(forward['public_ip'], forward['public_port'], forward['private_port']) msg += '"' hookenv.status_set(state, msg)
def set_active(): # If we got this far, the unit is active. Update the status if it is # not already active. We don't do this unconditionally, as the charm # may be active but doing stuff, like active but waiting for restart # permission. if hookenv.status_get() != 'active': helpers.set_active() else: hookenv.log('Unit status already active', DEBUG)
def check_optional_relations(configs): required_interfaces = {} if enable_nova_metadata(): required_interfaces['neutron-plugin-api'] = ['neutron-plugin-api'] if required_interfaces: set_os_workload_status(configs, required_interfaces) return status_get() else: return 'unknown', 'No optional relations'
def assess_status(): """Assess status of current unit""" # check to see if the unit is paused. application_version_set(get_upstream_version(VERSION_PACKAGE)) if is_unit_upgrading_set(): status_set("blocked", "Ready for do-release-upgrade and reboot. " "Set complete when finished.") return if is_unit_paused_set(): status_set('maintenance', "Paused. Use 'resume' action to resume normal service.") return # Check for mon relation if len(relation_ids('mon')) < 1: status_set('blocked', 'Missing relation: monitor') return # Check for monitors with presented addresses # Check for bootstrap key presentation monitors = get_mon_hosts() if len(monitors) < 1 or not get_conf('osd_bootstrap_key'): status_set('waiting', 'Incomplete relation: monitor') return # Check for vault if use_vaultlocker(): if not relation_ids('secrets-storage'): status_set('blocked', 'Missing relation: vault') return if not vaultlocker.vault_relation_complete(): status_set('waiting', 'Incomplete relation: vault') return # Check for OSD device creation parity i.e. at least some devices # must have been presented and used for this charm to be operational (prev_status, prev_message) = status_get() running_osds = ceph.get_running_osds() if not prev_message.startswith('Non-pristine'): if not running_osds: status_set('blocked', 'No block devices detected using current configuration') else: status_set('active', 'Unit is ready ({} OSD)'.format(len(running_osds))) else: pristine = True osd_journals = get_journal_devices() for dev in list(set(ceph.unmounted_disks()) - set(osd_journals)): if (not ceph.is_active_bluestore_device(dev) and not ceph.is_pristine_disk(dev)): pristine = False break if pristine: status_set('active', 'Unit is ready ({} OSD)'.format(len(running_osds)))
def status_set(state, message): '''DEPRECATED, set the unit's workload status. Set state == None to keep the same state and just change the message. ''' if state is None: state = hookenv.status_get()[0] if state not in ('active', 'waiting', 'blocked'): state = 'maintenance' # Guess status.status_set(state, message)
def check_optional_relations(configs): required_interfaces = {} if relation_ids("ha"): required_interfaces["ha"] = ["cluster"] try: get_hacluster_config() except: return ("blocked", "hacluster missing configuration: " "vip, vip_iface, vip_cidr") if required_interfaces: set_os_workload_status(configs, required_interfaces) return status_get() else: return "unknown", "No optional relations"
def status_set(state, message): '''Set the unit's workload status. Set state == None to keep the same state and just change the message. ''' if state is None: state = hookenv.status_get()[0] if state == 'unknown': state = 'maintenance' # Guess if state in ('error', 'blocked'): lvl = hookenv.WARNING else: lvl = hookenv.INFO hookenv.status_set(state, message) hookenv.log('{}: {}'.format(state, message), lvl)
def status_set(state, message): """Set the unit's workload status. Set state == None to keep the same state and just change the message. """ if state is None: state = hookenv.status_get()[0] if state == 'unknown': state = 'maintenance' # Guess if state in ('error', 'blocked'): lvl = hookenv.WARNING else: lvl = hookenv.INFO hookenv.status_set(state, message) hookenv.log('{}: {}'.format(state, message), lvl)
def check_optional_relations(configs): required_interfaces = {} if relation_ids('ha'): required_interfaces['ha'] = ['cluster'] try: get_hacluster_config() except: return ('blocked', 'hacluster missing configuration: ' 'vip, vip_iface, vip_cidr') if required_interfaces: set_os_workload_status(configs, required_interfaces) return status_get() else: return 'unknown', 'No optional relations'
def check_optional_relations(configs): required_interfaces = {} if relation_ids('ceph'): required_interfaces['storage-backend'] = ['ceph'] if relation_ids('neutron-plugin'): required_interfaces['neutron-plugin'] = ['neutron-plugin'] if relation_ids('shared-db') or relation_ids('pgsql-db'): required_interfaces['database'] = ['shared-db', 'pgsql-db'] if required_interfaces: set_os_workload_status(configs, required_interfaces) return status_get() else: return 'unknown', 'No optional relations'
def assess_status(): """Assess status of current unit""" # check to see if the unit is paused. application_version_set(get_upstream_version(VERSION_PACKAGE)) if is_unit_upgrading_set(): status_set( "blocked", "Ready for do-release-upgrade and reboot. " "Set complete when finished.") return if is_unit_paused_set(): status_set('maintenance', "Paused. Use 'resume' action to resume normal service.") return # Check for mon relation if len(relation_ids('mon')) < 1: status_set('blocked', 'Missing relation: monitor') return # Check for monitors with presented addresses # Check for bootstrap key presentation monitors = get_mon_hosts() if len(monitors) < 1 or not get_conf('osd_bootstrap_key'): status_set('waiting', 'Incomplete relation: monitor') return # Check for vault if use_vaultlocker(): if not relation_ids('secrets-storage'): status_set('blocked', 'Missing relation: vault') return if not vaultlocker.vault_relation_complete(): status_set('waiting', 'Incomplete relation: vault') return # Check for OSD device creation parity i.e. at least some devices # must have been presented and used for this charm to be operational (prev_status, prev_message) = status_get() running_osds = ceph.get_running_osds() if not prev_message.startswith('Non-pristine'): if not running_osds: status_set( 'blocked', 'No block devices detected using current configuration') else: status_set('active', 'Unit is ready ({} OSD)'.format(len(running_osds)))
def check_optional_relations(configs): required_interfaces = {} if relation_ids('ha'): required_interfaces['ha'] = ['cluster'] try: get_hacluster_config() except: return ('blocked', 'hacluster missing configuration: ' 'vip, vip_iface, vip_cidr') if cmp_pkgrevno('radosgw', '0.55') >= 0 and \ relation_ids('identity-service'): required_interfaces['identity'] = ['identity-service'] if required_interfaces: set_os_workload_status(configs, required_interfaces) return status_get() else: return 'unknown', 'No optional relations'
def check_optional_relations(configs): required_interfaces = {} if relation_ids('ha'): required_interfaces['ha'] = ['cluster'] try: get_hacluster_config() except: return ('blocked', 'hacluster missing configuration: ' 'vip, vip_iface, vip_cidr') if relation_ids('quantum-network-service'): required_interfaces['quantum'] = ['quantum-network-service'] if relation_ids('cinder-volume-service'): required_interfaces['cinder'] = ['cinder-volume-service'] if relation_ids('neutron-api'): required_interfaces['neutron-api'] = ['neutron-api'] if required_interfaces: set_os_workload_status(configs, required_interfaces) return status_get() else: return 'unknown', 'No optional relations'
def status_set(state, message): """Set the unit's workload status. Set state == None to keep the same state and just change the message. Toggles the workloadstatus.{maintenance,blocked,waiting,active,unknown} states. """ if state is None: state = hookenv.status_get()[0] if state == "unknown": state = "maintenance" # Guess assert state in VALID_STATES, "Invalid state {}".format(state) if state in ("error", "blocked"): lvl = WARNING else: lvl = INFO hookenv.status_set(state, message) hookenv.log("{}: {}".format(state, message), lvl) initialize_workloadstatus_state(state)
def check_optional_relations(configs): required_interfaces = {} if relation_ids("ha"): required_interfaces["ha"] = ["cluster"] try: get_hacluster_config() except: return ("blocked", "hacluster missing configuration: " "vip, vip_iface, vip_cidr") if relation_ids("ceph") or relation_ids("object-store"): required_interfaces["storage-backend"] = ["ceph", "object-store"] if relation_ids("amqp"): required_interfaces["messaging"] = ["amqp"] if required_interfaces: set_os_workload_status(configs, required_interfaces) return status_get() else: return "unknown", "No optional relations"
def check_optional_relations(configs): required_interfaces = {} if relation_ids('ha'): required_interfaces['ha'] = ['cluster'] try: get_hacluster_config() except: return ('blocked', 'hacluster missing configuration: ' 'vip, vip_iface, vip_cidr') if relation_ids('ceph') or relation_ids('object-store'): required_interfaces['storage-backend'] = ['ceph', 'object-store'] if relation_ids('amqp'): required_interfaces['messaging'] = ['amqp'] if required_interfaces: set_os_workload_status(configs, required_interfaces) return status_get() else: return 'unknown', 'No optional relations'
def register_to_cloud(): """ Implementation of `register-to-cloud` action. This action reverts `remove-from-cloud` action. It starts nova-comptue system service which will trigger its re-registration in the cloud. """ log("Starting nova-compute service", DEBUG) service_resume('nova-compute') current_status = status_get() if current_status[0] == WORKLOAD_STATES.BLOCKED.value and \ current_status[1] == UNIT_REMOVED_MSG: status_set(WORKLOAD_STATES.ACTIVE, 'Unit is ready') nova_compute_hooks.update_status() function_set({ 'command': 'openstack compute service list', 'message': "Nova compute service started. It should get registered " "with the cloud controller in a short time. Use the " "'openstack' command to verify that it's registered." })
def status_set(status, msg): if not status: status = hookenv.status_get()[0] hookenv.log('{}: {}'.format(status, msg)) hookenv.status_set(status, msg)
def set_auth_keyspace_replication(session, settings): # Live operation, so keep status the same. status_set(hookenv.status_get(), 'Updating system_auth rf to {!r}'.format(settings)) statement = 'ALTER KEYSPACE system_auth WITH REPLICATION = %s' query(session, statement, ConsistencyLevel.ALL, (settings, ))
def test_status_get(self, check_output): check_output.return_value = 'active\n' result = hookenv.status_get() self.assertEqual(result, 'active') check_output.assert_called_with(['status-get'], universal_newlines=True)
def test_status_get_nostatus(self, check_output): check_output.side_effect = OSError(2, 'fail') result = hookenv.status_get() self.assertEqual(result, 'unknown') check_output.assert_called_with(['status-get'], universal_newlines=True)
def restart_apiserver(): prev_state, prev_msg = hookenv.status_get() hookenv.status_set('maintenance', 'Restarting kube-apiserver') host.service_restart('snap.kube-apiserver.daemon') hookenv.status_set(prev_state, prev_msg)
def update_status(): (status, message) = status_get() # if status is blocked rerun config changed to see # if we are now unblocked if status == status_blocked: config_changed()
def update_charm_status(update_config=True, force=False): def _render_config(ctx=None, do_check=True): if not ctx: ctx = get_context() changed = render_and_check(ctx, "controller.conf", "/etc/contrailctl/controller.conf", do_check) return (force or changed) update_config_func = _render_config if update_config else None result = check_run_prerequisites(CONTAINER_NAME, CONFIG_NAME, update_config_func, SERVICES_TO_CHECK) # hack for 4.1 due to fat containers do not call provision_control _, message = status_get() identity = json_loads(config.get("auth_info"), dict()) if (identity and 'contrail-control' in message and '(No BGP configuration for self)' in message): try: ip = get_ip() bgp_asn = '64512' # register control node to config api server (no auth) cmd = [ '/usr/share/contrail-utils/provision_control.py', '--api_server_ip', ip, '--router_asn', bgp_asn, '--admin_user', identity.get("keystone_admin_user"), '--admin_password', identity.get("keystone_admin_password"), '--admin_tenant_name', identity.get("keystone_admin_tenant") ] docker_utils.docker_exec(CONTAINER_NAME, cmd, shell=True) # register control node as a BGP speaker without md5 (no auth) cmd = [ '/usr/share/contrail-utils/provision_control.py', '--api_server_ip', ip, '--router_asn', bgp_asn, '--host_name', gethostname(), '--host_ip', ip, '--oper', 'add', '--admin_user', identity.get("keystone_admin_user"), '--admin_password', identity.get("keystone_admin_password"), '--admin_tenant_name', identity.get("keystone_admin_tenant") ] docker_utils.docker_exec(CONTAINER_NAME, cmd, shell=True) # wait a bit time.sleep(8) update_services_status(CONTAINER_NAME, SERVICES_TO_CHECK) except Exception as e: log("Can't provision control: {}".format(e), level=ERROR) # hack for contrail-api that is started at inapropriate moment to keystone if (identity and 'contrail-api' in message and '(Generic Connection:Keystone[] connection down)' in message): try: cmd = ['systemctl', 'restart', 'contrail-api'] docker_utils.docker_exec(CONTAINER_NAME, cmd, shell=True) # wait a bit time.sleep(8) update_services_status(CONTAINER_NAME, SERVICES_TO_CHECK) except Exception as e: log("Can't restart contrail-api: {}".format(e), level=ERROR) if not result: return ctx = get_context() missing_relations = [] if not ctx.get("db_user"): # NOTE: Charms don't allow to deploy cassandra in AllowAll mode missing_relations.append("contrail-controller-cluster") if not ctx.get("analytics_servers"): missing_relations.append("contrail-analytics") if get_ip() not in ctx.get("controller_servers"): missing_relations.append("contrail-cluster") if missing_relations: status_set('blocked', 'Missing relations: ' + ', '.join(missing_relations)) return if not ctx.get("cloud_orchestrator"): status_set('blocked', 'Missing cloud orchestrator info in relations.') return if not ctx.get("rabbitmq_password"): status_set('blocked', 'Missing RabbitMQ info in external relations.') return if not ctx.get("keystone_ip"): status_set('blocked', 'Missing auth info in relation with contrail-auth.') return # TODO: what should happens if relation departed? _render_config(ctx, do_check=False) run_container(CONTAINER_NAME, ctx.get("cloud_orchestrator"))
def set_auth_keyspace_replication(session, settings): # Live operation, so keep status the same. status_set(hookenv.status_get(), 'Updating system_auth rf to {!r}'.format(settings)) statement = 'ALTER KEYSPACE system_auth WITH REPLICATION = %s' query(session, statement, ConsistencyLevel.ALL, (settings,))
def restart_controller_manager(): prev_state, prev_msg = hookenv.status_get() hookenv.status_set('maintenance', 'Restarting kube-controller-manager') host.service_restart('snap.kube-controller-manager.daemon') hookenv.status_set(prev_state, prev_msg)
def status_get(): """Returns (workload_status, message) for this unit.""" return hookenv.status_get()
def assess_status(): """Assess status of current unit""" # check to see if the unit is paused. application_version_set(get_upstream_version(VERSION_PACKAGE)) if is_unit_upgrading_set(): status_set( "blocked", "Ready for do-release-upgrade and reboot. " "Set complete when finished.") return if is_unit_paused_set(): status_set('maintenance', "Paused. Use 'resume' action to resume normal service.") return # Check for mon relation if len(relation_ids('mon')) < 1: status_set('blocked', 'Missing relation: monitor') return # Check for monitors with presented addresses # Check for bootstrap key presentation monitors = get_mon_hosts() if len(monitors) < 1 or not get_conf('osd_bootstrap_key'): status_set('waiting', 'Incomplete relation: monitor') return # Check for vault if use_vaultlocker(): if not relation_ids('secrets-storage'): status_set('blocked', 'Missing relation: vault') return try: if not vaultlocker.vault_relation_complete(): status_set('waiting', 'Incomplete relation: vault') return except Exception as e: status_set('blocked', "Warning: couldn't verify vault relation") log("Exception when verifying vault relation - maybe it was " "offline?:\n{}".format(str(e))) log("Traceback: {}".format(traceback.format_exc())) # Check for OSD device creation parity i.e. at least some devices # must have been presented and used for this charm to be operational (prev_status, prev_message) = status_get() running_osds = ceph.get_running_osds() if not prev_message.startswith('Non-pristine'): if not running_osds: status_set( 'blocked', 'No block devices detected using current configuration') else: status_set('active', 'Unit is ready ({} OSD)'.format(len(running_osds))) else: pristine = True osd_journals = get_journal_devices() for dev in list(set(ceph.unmounted_disks()) - set(osd_journals)): if (not ceph.is_active_bluestore_device(dev) and not ceph.is_pristine_disk(dev)): pristine = False break if pristine: status_set('active', 'Unit is ready ({} OSD)'.format(len(running_osds))) try: get_bdev_enable_discard() except ValueError as ex: status_set('blocked', str(ex)) try: bluestore_compression = ch_context.CephBlueStoreCompressionContext() bluestore_compression.validate() except ValueError as e: status_set('blocked', 'Invalid configuration: {}'.format(str(e)))