class SlurmLoginCharm(CharmBase): """Operator charm responsible for lifecycle operations for slurmctld.""" _stored = StoredState() def __init__(self, *args): """Initialize charm and configure states and events to observe.""" super().__init__(*args) self._stored.set_default( slurm_installed=False, config_available=False, ) self.slurm_manager = SlurmManager(self, "slurmrestd") self._slurmrestd = SlurmrestdRequires(self, 'slurmrestd') event_handler_bindings = { self.on.install: self._on_install, self.on.upgrade_charm: self._on_upgrade, self._slurmrestd.on.config_available: self._on_check_status_and_write_config, self._slurmrestd.on.config_unavailable: self._on_check_status_and_write_config, } for event, handler in event_handler_bindings.items(): self.framework.observe(event, handler) def _on_install(self, event): self.slurm_manager.install() self.unit.status = ActiveStatus("slurm installed") self._stored.slurm_installed = True def _on_upgrade(self, event): """Upgrade charm event handler.""" self.slurm_manager.upgrade() def _on_check_status_and_write_config(self, event): slurm_installed = self._stored.slurm_installed slurm_config = self._stored.config_available logger.debug("##### inside check status and write config ######") if not (slurm_installed and slurm_config): if not slurm_config: self.unit.status = BlockedStatus( "NEED RELATION TO SLURM-CONFIGURATOR") else: self.unit.status = BlockedStatus("SLURM NOT INSTALLED") event.defer() return else: logger.debug("##### STATUS CONFIRMED ######") config = dict(self._slurmrestd.get_slurm_config()) logger.debug(config) self.slurm_manager.render_config_and_restart(config) self.unit.status = ActiveStatus("Slurmrestd Available") def set_config_available(self, boolean): """Set self._stored.slurmctld_available.""" self._stored.config_available = boolean
class SlurmConfiguratorCharm(CharmBase): """Facilitate slurm configuration operations.""" _stored = StoredState() def __init__(self, *args): """Init charm, _stored defaults, interfaces and observe events.""" super().__init__(*args) self._stored.set_default( default_partition=str(), munge_key=str(), slurm_installed=False, slurmctld_available=False, slurmdbd_available=False, slurmd_available=False, slurmrestd_available=False, ) self._elasticsearch = Elasticsearch(self, "elasticsearch") self._grafana = GrafanaSource(self, "grafana-source") self._influxdb = InfluxDB(self, "influxdb-api") self._nhc = Nhc(self, "nhc") self._slurmrestd = Slurmrestd(self, "slurmrestd") self._slurm_manager = SlurmManager(self, "slurmd") self._slurmctld = Slurmctld(self, "slurmctld") self._slurmdbd = Slurmdbd(self, "slurmdbd") self._slurmd = Slurmd(self, "slurmd") self._prolog_epilog = PrologEpilog(self, "prolog-epilog") # #### Charm lifecycle events #### # event_handler_bindings = { # #### Juju lifecycle events #### # self.on.install: self._on_install, # self.on.start: # self._on_check_status_and_write_config, self.on.config_changed: self._on_check_status_and_write_config, self.on.upgrade_charm: self._on_upgrade, # ######## Addons lifecycle events ######## # self._elasticsearch.on.elasticsearch_available: self._on_check_status_and_write_config, self._elasticsearch.on.elasticsearch_unavailable: self._on_check_status_and_write_config, self._grafana.on.grafana_available: self._on_grafana_available, self._influxdb.on.influxdb_available: self._on_influxdb_available, self._influxdb.on.influxdb_unavailable: self._on_check_status_and_write_config, self._nhc.on.nhc_bin_available: self._on_check_status_and_write_config, # ######## Slurm component lifecycle events ######## # self._slurmctld.on.slurmctld_available: self._on_check_status_and_write_config, self._slurmctld.on.slurmctld_unavailable: self._on_check_status_and_write_config, self._slurmdbd.on.slurmdbd_available: self._on_check_status_and_write_config, self._slurmdbd.on.slurmdbd_unavailable: self._on_check_status_and_write_config, self._slurmd.on.slurmd_available: self._on_check_status_and_write_config, self._slurmd.on.slurmd_unavailable: self._on_check_status_and_write_config, self._slurmrestd.on.slurmrestd_available: self._on_check_status_and_write_config, self._slurmrestd.on.slurmrestd_unavailable: self._on_check_status_and_write_config, self._prolog_epilog.on.prolog_epilog_available: self._on_check_status_and_write_config, self._prolog_epilog.on.prolog_epilog_unavailable: self._on_check_status_and_write_config, } for event, handler in event_handler_bindings.items(): self.framework.observe(event, handler) def _on_install(self, event): """Install the slurm snap and set the munge key.""" self._slurm_manager.install() self._stored.munge_key = self._slurm_manager.get_munge_key() self._stored.slurm_installed = True self.unit.status = ActiveStatus("Slurm Installed") def _on_upgrade(self, event): """Upgrade the charm.""" slurm_config = self._assemble_slurm_config() if not slurm_config: self.unit.status = BlockedStatus( "Cannot generate slurm_config, defering upgrade.") event.defer() return self._slurm_manager.upgrade(slurm_config) def _on_grafana_available(self, event): """Create the grafana-source if we are the leader and have influxdb.""" leader = self._is_leader() influxdb_info = self._get_influxdb_info() grafana = self._grafana if leader and influxdb_info: grafana.set_grafana_source_info(influxdb_info) def _on_influxdb_available(self, event): """Create the grafana-source if we have all the things.""" grafana = self._grafana influxdb_info = self._get_influxdb_info() leader = self._is_leader() if leader and grafana.is_joined and influxdb_info: grafana.set_grafana_source_info(influxdb_info) self._on_check_status_and_write_config(event) def _on_check_status_and_write_config(self, event): """Check that we have what we need before we proceed.""" if not self._check_status(): event.defer() return # Generate the slurm_config slurm_config = self._assemble_slurm_config() if not slurm_config: self.unit.status = BlockedStatus( "Cannot generate slurm_config - defering event.") event.defer() return self._slurmctld.set_slurm_config_on_app_relation_data(slurm_config, ) self._slurmd.set_slurm_config_on_app_relation_data(slurm_config, ) if self._stored.slurmrestd_available: self._slurmrestd.set_slurm_config_on_app_relation_data( slurm_config, ) self._slurm_manager.render_config_and_restart({ **slurm_config, 'munge_key': self.get_munge_key() }) def _assemble_slurm_config(self): """Assemble and return the slurm config.""" slurmctld_info = self._slurmctld.get_slurmctld_info() slurmdbd_info = self._slurmdbd.get_slurmdbd_info() slurmd_info = self._slurmd.get_slurmd_info() if not (slurmd_info and slurmctld_info and slurmdbd_info): return {} addons_info = self._assemble_addons() partitions_info = self._assemble_partitions(slurmd_info) logger.debug(addons_info) logger.debug(partitions_info) logger.debug(slurmctld_info) logger.debug(slurmdbd_info) return { 'munge_key': self._stored.munge_key, 'partitions': partitions_info, **slurmctld_info, **slurmdbd_info, **addons_info, **self.model.config, } def _assemble_partitions(self, slurmd_info): """Make any needed modifications to partition data.""" slurmd_info_tmp = copy.deepcopy(slurmd_info) for partition in slurmd_info: # Deep copy the partition to a tmp var so we can modify it as # needed whilst not modifying the object we are iterating over. partition_tmp = copy.deepcopy(partition) # Extract the partition_name from the partition and from the charm # config. partition_name = partition['partition_name'] default_partition_from_config = self.model.config.get( 'default_partition') # Check that the default_partition isn't defined in the charm # config. # If the user hasn't provided a default partition, then we infer # the partition_default by defaulting to the first related slurmd # application. if not default_partition_from_config: if partition['partition_name'] ==\ self._stored.default_partition: partition_tmp['partition_default'] = 'YES' else: if default_partition_from_config == partition_name: partition_tmp['partition_default'] = 'YES' slurmd_info_tmp.remove(partition) slurmd_info_tmp.append(partition_tmp) return slurmd_info_tmp def _assemble_addons(self): """Assemble any addon components.""" acct_gather = self._get_influxdb_info() elasticsearch_ingress = \ self._elasticsearch.get_elasticsearch_ingress() nhc_info = self._nhc.get_nhc_info() prolog_epilog = self._prolog_epilog.get_prolog_epilog() ctxt = dict() if prolog_epilog: ctxt['prolog_epilog'] = prolog_epilog if acct_gather: ctxt['acct_gather'] = acct_gather acct_gather_custom = self.model.config.get('acct_gather_custom') if acct_gather_custom: ctxt['acct_gather']['custom'] = acct_gather_custom if nhc_info: ctxt['nhc'] = { 'nhc_bin': nhc_info['nhc_bin'], 'health_check_interval': nhc_info['health_check_interval'], 'health_check_node_state': nhc_info['health_check_node_state'], } if elasticsearch_ingress: ctxt['elasticsearch_address'] = elasticsearch_ingress return ctxt def _check_status(self): """Check that the core components we need exist.""" slurmctld_available = self._stored.slurmctld_available slurmdbd_available = self._stored.slurmdbd_available slurmd_available = self._stored.slurmd_available slurm_installed = self._stored.slurm_installed default_partition = self._stored.default_partition deps = [ default_partition, slurmctld_available, slurmdbd_available, slurmd_available, slurm_installed, ] if not all(deps): if not slurmctld_available: self.unit.status = BlockedStatus("NEED RELATION TO SLURMCTLD") elif not slurmdbd_available: self.unit.status = BlockedStatus("NEED RELATION TO SLURMDBD") elif not slurmd_available: self.unit.status = BlockedStatus("NEED RELATION TO SLURMD") elif not slurm_installed: self.unit.status = BlockedStatus("SLURM NOT INSTALLED") else: self.unit.status = BlockedStatus("PARTITION NAME UNAVAILABLE") return False else: self.unit.status = ActiveStatus("") return True def _get_influxdb_info(self): """Return influxdb info.""" return self._influxdb.get_influxdb_info() def _is_leader(self): return self.model.unit.is_leader() def get_munge_key(self): """Return the slurmdbd_info from stored state.""" return self._stored.munge_key def get_default_partition(self): """Return self._stored.default_partition.""" return self._stored.default_partition def is_slurm_installed(self): """Return true/false based on whether or not slurm is installed.""" return self._stored.slurm_installed def set_slurmctld_available(self, slurmctld_available): """Set slurmctld_available.""" self._stored.slurmctld_available = slurmctld_available def set_slurmdbd_available(self, slurmdbd_available): """Set slurmdbd_available.""" self._stored.slurmdbd_available = slurmdbd_available def set_default_partition(self, partition_name): """Set self._stored.default_partition.""" self._stored.default_partition = partition_name def set_slurmd_available(self, slurmd_available): """Set slurmd_available.""" self._stored.slurmd_available = slurmd_available def set_slurmrestd_available(self, slurmrestd_available): """Set slurmrestd_available.""" self._stored.slurmrestd_available = slurmrestd_available
class SlurmdbdCharm(CharmBase): """Slurmdbd Charm Class.""" _stored = StoredState() def __init__(self, *args): """Set the defaults for slurmdbd.""" super().__init__(*args) self._stored.set_default(db_info=dict()) self._stored.set_default(munge_key=str()) self._stored.set_default(slurm_installed=False) self._slurm_manager = SlurmManager(self, "slurmdbd") self._slurmdbd = SlurmdbdProvidesRelation(self, "slurmdbd") self._db = MySQLClient(self, "db") event_handler_bindings = { self.on.install: self._on_install, self.on.config_changed: self._write_config_and_restart_slurmdbd, self._db.on.database_available: self._write_config_and_restart_slurmdbd, self._slurmdbd.on.munge_key_available: self._write_config_and_restart_slurmdbd, self._slurmdbd.on.slurmctld_unavailable: self._on_slurmctld_unavailable, self.on.upgrade_charm: self._on_upgrade, } for event, handler in event_handler_bindings.items(): self.framework.observe(event, handler) def _on_install(self, event): self._slurm_manager.install() self._stored.slurm_installed = True self.unit.status = ActiveStatus("Slurm Installed") def _on_upgrade(self, event): """Handle upgrade charm event.""" logger.debug('_on_upgrade(): entering') #self._slurm_manager.upgrade() resource_path = str(self.model.resources.fetch('slurm')) subprocess.call([ "snap", "install", resource_path, "--dangerous", "--classic", ]) def _on_slurmctld_unavailable(self, event): self.unit.status = BlockedStatus("Need relation to slurmctld.") def _check_status(self) -> bool: """Check that we have the things we need.""" db_info = self._stored.db_info munge_key = self._stored.munge_key slurm_installed = self._stored.slurm_installed if not (db_info and slurm_installed and munge_key): if not self._stored.db_info: self.unit.status = BlockedStatus("Need relation to MySQL.") elif not self._stored.munge_key: self.unit.status = BlockedStatus("Need relation to slurmctld.") return False return True def _write_config_and_restart_slurmdbd(self, event): """Check for prereqs before writing config/restart of slurmdbd.""" if not self._check_status(): event.defer() return slurmdbd_host_port_addr = { 'slurmdbd_hostname': socket.gethostname().split(".")[0], 'slurmdbd_port': "6819", } slurmdbd_config = { 'munge_key': self._stored.munge_key, **slurmdbd_host_port_addr, **self.model.config, **self._stored.db_info, } self._slurm_manager.render_config_and_restart(slurmdbd_config) self._slurmdbd.set_slurmdbd_available_on_unit_relation_data() self.unit.status = ActiveStatus("Slurmdbd Available") def set_munge_key(self, munge_key): """Set the munge key in the stored state.""" self._stored.munge_key = munge_key def set_db_info(self, db_info): """Set the db_info in the stored state.""" self._stored.db_info = db_info
class SlurmdCharm(CharmBase): """Slurmd lifecycle events.""" _stored = StoredState() def __init__(self, *args): """Init _stored attributes and interfaces, observe events.""" super().__init__(*args) self._stored.set_default( user_node_state=str(), partition_name=str(), config_available=False, ) self._nrpe = Nrpe(self, "nrpe-external-master") self._slurm_manager = SlurmManager(self, "slurmd") self._slurmd = Slurmd(self, "slurmd") self._slurmd_peer = SlurmdPeer(self, "slurmd-peer") event_handler_bindings = { self.on.install: self._on_install, self.on.upgrade_charm: self._on_upgrade, self.on.config_changed: self._on_send_slurmd_info, self._slurmd_peer.on.slurmd_peer_available: self._on_send_slurmd_info, self._slurmd.on.slurm_config_available: self._on_check_status_and_write_config, self.on.set_node_state_action: self._on_set_node_state_action, } for event, handler in event_handler_bindings.items(): self.framework.observe(event, handler) def _on_config_changed(self, event): self.get_set_return_partition_name() self._on_send_slurmd_info(event) def _on_install(self, event): self._slurm_manager.install() self._stored.slurm_installed = True self.unit.status = ActiveStatus("Slurm Installed") def _on_upgrade(self, event): self._slurm_manager.upgrade() def _on_set_node_state_action(self, event): """Set the node state.""" self._stored.user_node_state = event.params["node-state"] self._on_send_slurm_info(event) def _on_send_slurmd_info(self, event): if self.framework.model.unit.is_leader(): if self._slurmd.is_joined: partition = self._assemble_partition() if partition: self._slurmd.set_slurmd_info_on_app_relation_data( partition) return event.defer() return def _on_check_status_and_write_config(self, event): if not self._check_status(): event.defer() return slurm_config = dict(self._slurmd.get_slurm_config()) self._slurm_manager.render_config_and_restart(slurm_config) self.unit.status = ActiveStatus("Slurmd Available") def _check_status(self): slurm_installed = self._stored.slurm_installed config_available = self._stored.config_available if not (slurm_installed and config_available): self.unit.status = BlockedStatus( "NEED RELATION TO SLURM CONFIGURATOR") return False else: return True def _assemble_partition(self): """Assemble the partition info.""" partition_name = self._stored.partition_name partition_config = self.model.config.get('partition-config') partition_state = self.model.config.get('partition-state') slurmd_info = self._assemble_slurmd_info() return { 'inventory': slurmd_info, 'partition_name': partition_name, 'partition_state': partition_state, 'partition_config': partition_config, } def _assemble_slurmd_info(self): """Apply mutations to nodes in the partition, return slurmd nodes.""" slurmd_info = self._slurmd_peer.get_slurmd_info() if not slurmd_info: return None # If the user has set custom state for nodes # ensure we update the state for the targeted nodes. user_node_state = self._stored.user_node_state if user_node_state: node_states = { item.split("=")[0]: item.split("=")[1] for item in user_node_state.split(",") } # Copy the slurmd_info returned from the the slurmd-peer relation # to a temporary variable to which we will make modifications. slurmd_info_tmp = copy.deepcopy(slurmd_info) # Iterate over the slurmd nodes in the partition and check # for nodes that need their state modified. for partition in slurmd_info: partition_tmp = copy.deepcopy(partition) for slurmd_node in partition['inventory']: if slurmd_node['hostname'] in node_states.keys(): slurmd_node_tmp = copy.deepcopy(slurmd_node) slurmd_node_tmp['state'] = \ node_states[slurmd_node['hostname']] partition_tmp['inventory'].remove(slurmd_node) partition_tmp['inventory'].append(slurmd_node_tmp) slurmd_info_tmp.remove(partition) slurmd_info_tmp.append(partition_tmp) else: slurmd_info_tmp = slurmd_info return slurmd_info_tmp def get_set_return_partition_name(self): """Set the partition name.""" # Determine if a partition-name config exists, if so # ensure the partition_name known by the charm is consistent. # If no partition name has been specified then generate one. partition_name = self.model.config.get('partition-name') if partition_name: if partition_name != self._stored.partition_name: self._stored.partition_name = partition_name elif not self._stored.partition_name: self._stored.partition_name = f"juju-compute-{random_string()}" return self._stored.partition_name def get_slurm_component(self): """Return the slurm component.""" return self._slurm_manager.slurm_component def get_hostname(self): """Return the hostname.""" return self._slurm_manager.hostname def get_port(self): """Return the port.""" return self._slurm_manager.port
class SlurmctldCharm(CharmBase): """Slurmctld lifecycle events.""" _stored = StoredState() def __init__(self, *args): """Init _stored attributes and interfaces, observe events.""" super().__init__(*args) self._stored.set_default( munge_key=str(), slurmctld_controller_type=str(), ) self._nrpe = Nrpe(self, "nrpe-external-master") self._slurm_manager = SlurmManager(self, "slurmctld") self._slurmctld = Slurmctld(self, "slurmctld") self._slurmctld_peer = SlurmctldPeer(self, "slurmctld-peer") event_handler_bindings = { self.on.install: self._on_install, self._slurmctld.on.slurm_config_available: self._on_check_status_and_write_config, self._slurmctld_peer.on.slurmctld_peer_available: self._on_slurmctld_peer_available, } for event, handler in event_handler_bindings.items(): self.framework.observe(event, handler) def _on_install(self, event): self._slurm_manager.install() self._stored.slurm_installed = True self.unit.status = ActiveStatus("Slurm Installed") def _on_upgrade(self, event): self._slurm_manager.upgrade() def _on_slurmctld_peer_available(self, event): if self.framework.model.unit.is_leader(): if self._slurmctld.is_joined: slurmctld_info = self._slurmctld_peer.get_slurmctld_info() if slurmctld_info: self._slurmctld.set_slurmctld_info_on_app_relation_data( slurmctld_info) return event.defer() return def _on_check_status_and_write_config(self, event): if not self._check_status(): event.defer() return slurm_config = self._slurmctld.get_slurm_config_from_relation() if not slurm_config: event.defer() return munge_key = self._stored.munge_key if not munge_key: event.defer() return self._slurm_manager.render_config_and_restart({ **slurm_config, 'munge_key': munge_key }) self.unit.status = ActiveStatus("Slurmctld Available") def _check_status(self): munge_key = self._stored.munge_key slurm_installed = self._stored.slurm_installed slurm_config = self._slurmctld.get_slurm_config_from_relation() if not (munge_key and slurm_installed and slurm_config): if not munge_key: self.unit.status = BlockedStatus( "NEED RELATION TO SLURM CONFIGURATOR") elif not slurm_config: self.unit.status = BlockedStatus("WAITING ON SLURM CONFIG") else: self.unit.status = BlockedStatus("SLURM NOT INSTALLED") return False else: return True def set_munge_key(self, munge_key): """Set the munge_key in _stored state.""" self._stored.munge_key = munge_key def get_slurm_component(self): """Return the slurm component.""" return self._slurm_manager.slurm_component def get_hostname(self): """Return the hostname.""" return self._slurm_manager.hostname def get_port(self): """Return the port.""" return self._slurm_manager.port
class SlurmdCharm(CharmBase): """Operator charm responsible for facilitating slurmd lifecycle events.""" _stored = StoredState() def __init__(self, *args): """Initialize charm state, and observe charm lifecycle events.""" super().__init__(*args) self.config = self.model.config self.slurm_manager = SlurmManager(self, 'slurmd') self.slurmd = SlurmdProvides(self, "slurmd") self._stored.set_default( slurm_installed=False, slurm_config_available=False, slurm_config=dict(), ) event_handler_bindings = { self.on.install: self._on_install, self.on.config_changed: self._on_config_changed, self.on.upgrade_charm: self._on_upgrade, self.slurmd.on.slurmctld_available: self._on_render_config_and_restart, self.slurmd.on.slurmctld_unavailable: self._on_render_config_and_restart, } for event, handler in event_handler_bindings.items(): self.framework.observe(event, handler) def _on_install(self, event): """Install the slurm scheduler as snap or tar file.""" self.slurm_manager.install() self.unit.status = ActiveStatus("Slurm Installed") self._stored.slurm_installed = True def _on_upgrade(self, event): """Upgrade charm event handler.""" slurm_config = dict(self._stored.slurm_config) self.slurm_manager.upgrade(slurm_config, resource=False) def _on_config_changed(self, event): self.slurmd.force_set_config_on_app_relation_data() def _on_render_config_and_restart(self, event): """Retrieve slurm_config from controller and write slurm.conf.""" slurm_installed = self._stored.slurm_installed slurm_config_available = self._stored.slurm_config_available if (slurm_installed and slurm_config_available): # cast StoredState -> python dict slurm_config = dict(self._stored.slurm_config) self.slurm_manager.render_config_and_restart(slurm_config) self.unit.status = ActiveStatus("Slurmd Available") else: self.unit.status = BlockedStatus( "Blocked need relation to slurmctld." ) event.defer() return def is_slurm_installed(self): """Return true/false based on whether or not slurm is installed.""" return self._stored.slurm_installed def set_slurm_config_available(self, config_available): """Set slurm_config_available in local stored state.""" self._stored.slurm_config_available = config_available def set_slurm_config(self, slurm_config): """Set the slurm_config in local stored state.""" self._stored.slurm_config = slurm_config
class SlurmdbdCharm(CharmBase): """Slurmdbd Charm.""" _stored = StoredState() def __init__(self, *args): """Set the default class attributes.""" super().__init__(*args) self._stored.set_default(munge_key=str()) self._stored.set_default(db_info=dict()) self._stored.set_default(slurm_installed=False) self._nrpe = Nrpe(self, "nrpe-external-master") self._slurm_manager = SlurmManager(self, "slurmdbd") self._slurmdbd = Slurmdbd(self, "slurmdbd") self._slurmdbd_peer = SlurmdbdPeer(self, "slurmdbd-peer") self._db = MySQLClient(self, "db") event_handler_bindings = { self.on.install: self._on_install, self.on.config_changed: self._write_config_and_restart_slurmdbd, self._db.on.database_available: self._write_config_and_restart_slurmdbd, self._slurmdbd_peer.on.slurmdbd_peer_available: self._write_config_and_restart_slurmdbd, self._slurmdbd.on.slurmdbd_available: self._write_config_and_restart_slurmdbd, self._slurmdbd.on.slurmdbd_unavailable: self._on_slurmdbd_unavailable, } for event, handler in event_handler_bindings.items(): self.framework.observe(event, handler) def _on_install(self, event): self._slurm_manager.install() self._stored.slurm_installed = True self.unit.status = ActiveStatus("Slurm Installed") def _on_upgrade(self, event): """Handle upgrade charm event.""" self._slurm_manager.upgrade() def _on_leader_elected(self, event): self._slurmdbd_peer._on_relation_changed(event) def _on_slurmdbd_unavailable(self, event): self._check_status() def _check_status(self) -> bool: """Check that we have the things we need.""" db_info = self._stored.db_info munge_key = self._stored.munge_key slurm_installed = self._stored.slurm_installed slurmdbd_info = self._slurmdbd_peer.get_slurmdbd_info() deps = [ slurmdbd_info, db_info, slurm_installed, munge_key, ] if not all(deps): if not db_info: self.unit.status = BlockedStatus("Need relation to MySQL.") elif not munge_key: self.unit.status = BlockedStatus( "Need relation to slurm-configurator.") return False return True def _write_config_and_restart_slurmdbd(self, event): """Check for prereqs before writing config/restart of slurmdbd.""" if not self._check_status(): event.defer() return db_info = self._stored.db_info slurmdbd_info = self._slurmdbd_peer.get_slurmdbd_info() slurmdbd_config = { 'munge_key': self._stored.munge_key, **self.model.config, **slurmdbd_info, **db_info, } if self.model.unit.is_leader(): self._slurmdbd.set_slurmdbd_info_on_app_relation_data( slurmdbd_info) self._slurm_manager.render_config_and_restart(slurmdbd_config) self.unit.status = ActiveStatus("Slurmdbd Available") def get_port(self): """Return the port from slurm-ops-manager.""" return self._slurm_manager.port def get_hostname(self): """Return the hostname from slurm-ops-manager.""" return self._slurm_manager.hostname def get_slurm_component(self): """Return the slurm component.""" return self._slurm_manager.slurm_component def set_munge_key(self, munge_key): """Set the munge key in the stored state.""" self._stored.munge_key = munge_key def set_db_info(self, db_info): """Set the db_info in the stored state.""" self._stored.db_info = db_info
class SlurmdbdCharm(CharmBase): """Slurmdbd Charm.""" _stored = StoredState() def __init__(self, *args): """Set the default class attributes.""" super().__init__(*args) self._stored.set_default(munge_key=str()) self._stored.set_default(db_info=dict()) self._stored.set_default(slurm_installed=False) self._nrpe = Nrpe(self, "nrpe-external-master") self._slurm_manager = SlurmManager(self, "slurmdbd") self._slurmdbd = Slurmdbd(self, "slurmdbd") self._slurmdbd_peer = SlurmdbdPeer(self, "slurmdbd-peer") self._db = MySQLClient(self, "db") event_handler_bindings = { self.on.install: self._on_install, self.on.config_changed: self._write_config_and_restart_slurmdbd, self._db.on.database_available: self._write_config_and_restart_slurmdbd, self._slurmdbd_peer.on.slurmdbd_peer_available: self._write_config_and_restart_slurmdbd, self._slurmdbd.on.slurmdbd_available: self._write_config_and_restart_slurmdbd, self._slurmdbd.on.slurmdbd_unavailable: self._on_slurmdbd_unavailable, } for event, handler in event_handler_bindings.items(): self.framework.observe(event, handler) def _on_install(self, event): self._slurm_manager.install() self._stored.slurm_installed = True self.unit.status = ActiveStatus("Slurm Installed") def _on_upgrade(self, event): """Handle upgrade charm event.""" self._slurm_manager.upgrade() def _on_leader_elected(self, event): self._slurmdbd_peer._on_relation_changed(event) def _on_slurmdbd_unavailable(self, event): self._check_status() def _check_status(self) -> bool: """Check that we have the things we need.""" db_info = self._stored.db_info munge_key = self._stored.munge_key slurm_installed = self._stored.slurm_installed slurmdbd_info = self._slurmdbd_peer.get_slurmdbd_info() deps = [ slurmdbd_info, db_info, slurm_installed, munge_key, ] if not all(deps): if not db_info: self.unit.status = BlockedStatus("Need relation to MySQL.") elif not munge_key: self.unit.status = BlockedStatus( "Need relation to slurm-configurator.") return False return True def _write_config_and_restart_slurmdbd(self, event): """Check for prereqs before writing config/restart of slurmdbd.""" # Ensure all pre-conditions are met with _check_statu(), if not # defer the event. if not self._check_status(): event.defer() return db_info = self._stored.db_info slurmdbd_info = self._slurmdbd_peer.get_slurmdbd_info() slurmdbd_config = { 'munge_key': self._stored.munge_key, **self.model.config, **slurmdbd_info, **db_info, } self._slurm_manager.render_config_and_restart(slurmdbd_config) logger.debug("rendering config and restarting") # Only the leader can set relation data on the application. # Enforce that no one other then the leader trys to set # application relation data. if self.model.unit.is_leader(): self._slurmdbd.set_slurmdbd_info_on_app_relation_data({ # Juju, and subsequently the operator framework do not # emit relation-changed events if data hasn't actually # changed on the other side of the relation. Even if we set # the data multiple times, it doesn't mean anything unless # the data being set is different then what already exists # in the relation data. # # We use 'slurmdbd_info_id' to ensure the slurmdbd_info # is unique each time it is set on the application relation # data. This is needed so that that related applications # (namely slurm-configurator) will observe a # relation-changed event. # # This event (_write_config_and_restart_slurmdbd) may be # invoked multiple times once _check_status() returns True # (aka pre-conditions are met that account for the deffered # invocations.) # This means that the same slurmdbd_info data may be set on # application data multiple times and slurmdbd may be # reconfigured and restarted while slurmctld and the rest # of the stack are trying to come up and create the clustr. # # We need slurm-configurator to emit the relation-changed # event for the slurmdbd relation every time data is set, # not just when data has changed. # slurm-configurator need to re-emit its chain # of observed events to ensure all services end up getting # reconfigured *and* restarted *after* slurmdbd, for each # time that slurmdbd gets reconfigured and restarted. # # For this reason, 'slurmdbd_info_id' only # matters in the context of making sure the application # relation data actually changes so that relation-changed # event is observed on the other side. 'slurmdbd_info_id': str(uuid.uuid4()), **slurmdbd_info }) self.unit.status = ActiveStatus("Slurmdbd Available") def get_port(self): """Return the port from slurm-ops-manager.""" return self._slurm_manager.port def get_hostname(self): """Return the hostname from slurm-ops-manager.""" return self._slurm_manager.hostname def get_slurm_component(self): """Return the slurm component.""" return self._slurm_manager.slurm_component def set_munge_key(self, munge_key): """Set the munge key in the stored state.""" self._stored.munge_key = munge_key def set_db_info(self, db_info): """Set the db_info in the stored state.""" self._stored.db_info = db_info