예제 #1
0
class SlurmLoginCharm(CharmBase):
    """Operator charm responsible for lifecycle operations for slurmctld."""

    _stored = StoredState()

    def __init__(self, *args):
        """Initialize charm and configure states and events to observe."""
        super().__init__(*args)
        self._stored.set_default(
            slurm_installed=False,
            config_available=False,
        )
        self.slurm_manager = SlurmManager(self, "slurmrestd")
        self._slurmrestd = SlurmrestdRequires(self, 'slurmrestd')

        event_handler_bindings = {
            self.on.install:
            self._on_install,
            self.on.upgrade_charm:
            self._on_upgrade,
            self._slurmrestd.on.config_available:
            self._on_check_status_and_write_config,
            self._slurmrestd.on.config_unavailable:
            self._on_check_status_and_write_config,
        }
        for event, handler in event_handler_bindings.items():
            self.framework.observe(event, handler)

    def _on_install(self, event):
        self.slurm_manager.install()
        self.unit.status = ActiveStatus("slurm installed")
        self._stored.slurm_installed = True

    def _on_upgrade(self, event):
        """Upgrade charm event handler."""
        self.slurm_manager.upgrade()

    def _on_check_status_and_write_config(self, event):
        slurm_installed = self._stored.slurm_installed
        slurm_config = self._stored.config_available
        logger.debug("##### inside check status and write config ######")
        if not (slurm_installed and slurm_config):
            if not slurm_config:
                self.unit.status = BlockedStatus(
                    "NEED RELATION TO SLURM-CONFIGURATOR")
            else:
                self.unit.status = BlockedStatus("SLURM NOT INSTALLED")
            event.defer()
            return
        else:
            logger.debug("##### STATUS CONFIRMED ######")
            config = dict(self._slurmrestd.get_slurm_config())
            logger.debug(config)
            self.slurm_manager.render_config_and_restart(config)
            self.unit.status = ActiveStatus("Slurmrestd Available")

    def set_config_available(self, boolean):
        """Set self._stored.slurmctld_available."""
        self._stored.config_available = boolean
예제 #2
0
class SlurmConfiguratorCharm(CharmBase):
    """Facilitate slurm configuration operations."""

    _stored = StoredState()

    def __init__(self, *args):
        """Init charm, _stored defaults, interfaces and observe events."""
        super().__init__(*args)

        self._stored.set_default(
            default_partition=str(),
            munge_key=str(),
            slurm_installed=False,
            slurmctld_available=False,
            slurmdbd_available=False,
            slurmd_available=False,
            slurmrestd_available=False,
        )

        self._elasticsearch = Elasticsearch(self, "elasticsearch")
        self._grafana = GrafanaSource(self, "grafana-source")
        self._influxdb = InfluxDB(self, "influxdb-api")
        self._nhc = Nhc(self, "nhc")
        self._slurmrestd = Slurmrestd(self, "slurmrestd")
        self._slurm_manager = SlurmManager(self, "slurmd")
        self._slurmctld = Slurmctld(self, "slurmctld")
        self._slurmdbd = Slurmdbd(self, "slurmdbd")
        self._slurmd = Slurmd(self, "slurmd")
        self._prolog_epilog = PrologEpilog(self, "prolog-epilog")

        # #### Charm lifecycle events #### #
        event_handler_bindings = {
            # #### Juju lifecycle events #### #
            self.on.install:
            self._on_install,

            # self.on.start:
            # self._on_check_status_and_write_config,
            self.on.config_changed:
            self._on_check_status_and_write_config,
            self.on.upgrade_charm:
            self._on_upgrade,

            # ######## Addons lifecycle events ######## #
            self._elasticsearch.on.elasticsearch_available:
            self._on_check_status_and_write_config,
            self._elasticsearch.on.elasticsearch_unavailable:
            self._on_check_status_and_write_config,
            self._grafana.on.grafana_available:
            self._on_grafana_available,
            self._influxdb.on.influxdb_available:
            self._on_influxdb_available,
            self._influxdb.on.influxdb_unavailable:
            self._on_check_status_and_write_config,
            self._nhc.on.nhc_bin_available:
            self._on_check_status_and_write_config,

            # ######## Slurm component lifecycle events ######## #
            self._slurmctld.on.slurmctld_available:
            self._on_check_status_and_write_config,
            self._slurmctld.on.slurmctld_unavailable:
            self._on_check_status_and_write_config,
            self._slurmdbd.on.slurmdbd_available:
            self._on_check_status_and_write_config,
            self._slurmdbd.on.slurmdbd_unavailable:
            self._on_check_status_and_write_config,
            self._slurmd.on.slurmd_available:
            self._on_check_status_and_write_config,
            self._slurmd.on.slurmd_unavailable:
            self._on_check_status_and_write_config,
            self._slurmrestd.on.slurmrestd_available:
            self._on_check_status_and_write_config,
            self._slurmrestd.on.slurmrestd_unavailable:
            self._on_check_status_and_write_config,
            self._prolog_epilog.on.prolog_epilog_available:
            self._on_check_status_and_write_config,
            self._prolog_epilog.on.prolog_epilog_unavailable:
            self._on_check_status_and_write_config,
        }
        for event, handler in event_handler_bindings.items():
            self.framework.observe(event, handler)

    def _on_install(self, event):
        """Install the slurm snap and set the munge key."""
        self._slurm_manager.install()
        self._stored.munge_key = self._slurm_manager.get_munge_key()
        self._stored.slurm_installed = True
        self.unit.status = ActiveStatus("Slurm Installed")

    def _on_upgrade(self, event):
        """Upgrade the charm."""
        slurm_config = self._assemble_slurm_config()

        if not slurm_config:
            self.unit.status = BlockedStatus(
                "Cannot generate slurm_config, defering upgrade.")
            event.defer()
            return

        self._slurm_manager.upgrade(slurm_config)

    def _on_grafana_available(self, event):
        """Create the grafana-source if we are the leader and have influxdb."""
        leader = self._is_leader()
        influxdb_info = self._get_influxdb_info()
        grafana = self._grafana

        if leader and influxdb_info:
            grafana.set_grafana_source_info(influxdb_info)

    def _on_influxdb_available(self, event):
        """Create the grafana-source if we have all the things."""
        grafana = self._grafana
        influxdb_info = self._get_influxdb_info()
        leader = self._is_leader()

        if leader and grafana.is_joined and influxdb_info:
            grafana.set_grafana_source_info(influxdb_info)

        self._on_check_status_and_write_config(event)

    def _on_check_status_and_write_config(self, event):
        """Check that we have what we need before we proceed."""
        if not self._check_status():
            event.defer()
            return

        # Generate the slurm_config
        slurm_config = self._assemble_slurm_config()

        if not slurm_config:
            self.unit.status = BlockedStatus(
                "Cannot generate slurm_config - defering event.")
            event.defer()
            return

        self._slurmctld.set_slurm_config_on_app_relation_data(slurm_config, )
        self._slurmd.set_slurm_config_on_app_relation_data(slurm_config, )
        if self._stored.slurmrestd_available:
            self._slurmrestd.set_slurm_config_on_app_relation_data(
                slurm_config, )
        self._slurm_manager.render_config_and_restart({
            **slurm_config, 'munge_key':
            self.get_munge_key()
        })

    def _assemble_slurm_config(self):
        """Assemble and return the slurm config."""
        slurmctld_info = self._slurmctld.get_slurmctld_info()
        slurmdbd_info = self._slurmdbd.get_slurmdbd_info()
        slurmd_info = self._slurmd.get_slurmd_info()

        if not (slurmd_info and slurmctld_info and slurmdbd_info):
            return {}

        addons_info = self._assemble_addons()
        partitions_info = self._assemble_partitions(slurmd_info)

        logger.debug(addons_info)
        logger.debug(partitions_info)
        logger.debug(slurmctld_info)
        logger.debug(slurmdbd_info)

        return {
            'munge_key': self._stored.munge_key,
            'partitions': partitions_info,
            **slurmctld_info,
            **slurmdbd_info,
            **addons_info,
            **self.model.config,
        }

    def _assemble_partitions(self, slurmd_info):
        """Make any needed modifications to partition data."""
        slurmd_info_tmp = copy.deepcopy(slurmd_info)

        for partition in slurmd_info:

            # Deep copy the partition to a tmp var so we can modify it as
            # needed whilst not modifying the object we are iterating over.
            partition_tmp = copy.deepcopy(partition)
            # Extract the partition_name from the partition and from the charm
            # config.
            partition_name = partition['partition_name']
            default_partition_from_config = self.model.config.get(
                'default_partition')

            # Check that the default_partition isn't defined in the charm
            # config.
            # If the user hasn't provided a default partition, then we infer
            # the partition_default by defaulting to the first related slurmd
            # application.
            if not default_partition_from_config:
                if partition['partition_name'] ==\
                   self._stored.default_partition:
                    partition_tmp['partition_default'] = 'YES'
            else:
                if default_partition_from_config == partition_name:
                    partition_tmp['partition_default'] = 'YES'

            slurmd_info_tmp.remove(partition)
            slurmd_info_tmp.append(partition_tmp)

        return slurmd_info_tmp

    def _assemble_addons(self):
        """Assemble any addon components."""
        acct_gather = self._get_influxdb_info()
        elasticsearch_ingress = \
            self._elasticsearch.get_elasticsearch_ingress()
        nhc_info = self._nhc.get_nhc_info()
        prolog_epilog = self._prolog_epilog.get_prolog_epilog()

        ctxt = dict()

        if prolog_epilog:
            ctxt['prolog_epilog'] = prolog_epilog

        if acct_gather:
            ctxt['acct_gather'] = acct_gather
            acct_gather_custom = self.model.config.get('acct_gather_custom')
            if acct_gather_custom:
                ctxt['acct_gather']['custom'] = acct_gather_custom

        if nhc_info:
            ctxt['nhc'] = {
                'nhc_bin': nhc_info['nhc_bin'],
                'health_check_interval': nhc_info['health_check_interval'],
                'health_check_node_state': nhc_info['health_check_node_state'],
            }

        if elasticsearch_ingress:
            ctxt['elasticsearch_address'] = elasticsearch_ingress

        return ctxt

    def _check_status(self):
        """Check that the core components we need exist."""
        slurmctld_available = self._stored.slurmctld_available
        slurmdbd_available = self._stored.slurmdbd_available
        slurmd_available = self._stored.slurmd_available
        slurm_installed = self._stored.slurm_installed
        default_partition = self._stored.default_partition

        deps = [
            default_partition,
            slurmctld_available,
            slurmdbd_available,
            slurmd_available,
            slurm_installed,
        ]

        if not all(deps):
            if not slurmctld_available:
                self.unit.status = BlockedStatus("NEED RELATION TO SLURMCTLD")
            elif not slurmdbd_available:
                self.unit.status = BlockedStatus("NEED RELATION TO SLURMDBD")
            elif not slurmd_available:
                self.unit.status = BlockedStatus("NEED RELATION TO SLURMD")
            elif not slurm_installed:
                self.unit.status = BlockedStatus("SLURM NOT INSTALLED")
            else:
                self.unit.status = BlockedStatus("PARTITION NAME UNAVAILABLE")
            return False
        else:
            self.unit.status = ActiveStatus("")
            return True

    def _get_influxdb_info(self):
        """Return influxdb info."""
        return self._influxdb.get_influxdb_info()

    def _is_leader(self):
        return self.model.unit.is_leader()

    def get_munge_key(self):
        """Return the slurmdbd_info from stored state."""
        return self._stored.munge_key

    def get_default_partition(self):
        """Return self._stored.default_partition."""
        return self._stored.default_partition

    def is_slurm_installed(self):
        """Return true/false based on whether or not slurm is installed."""
        return self._stored.slurm_installed

    def set_slurmctld_available(self, slurmctld_available):
        """Set slurmctld_available."""
        self._stored.slurmctld_available = slurmctld_available

    def set_slurmdbd_available(self, slurmdbd_available):
        """Set slurmdbd_available."""
        self._stored.slurmdbd_available = slurmdbd_available

    def set_default_partition(self, partition_name):
        """Set self._stored.default_partition."""
        self._stored.default_partition = partition_name

    def set_slurmd_available(self, slurmd_available):
        """Set slurmd_available."""
        self._stored.slurmd_available = slurmd_available

    def set_slurmrestd_available(self, slurmrestd_available):
        """Set slurmrestd_available."""
        self._stored.slurmrestd_available = slurmrestd_available
예제 #3
0
class SlurmdbdCharm(CharmBase):
    """Slurmdbd Charm Class."""

    _stored = StoredState()

    def __init__(self, *args):
        """Set the defaults for slurmdbd."""
        super().__init__(*args)

        self._stored.set_default(db_info=dict())
        self._stored.set_default(munge_key=str())
        self._stored.set_default(slurm_installed=False)

        self._slurm_manager = SlurmManager(self, "slurmdbd")
        self._slurmdbd = SlurmdbdProvidesRelation(self, "slurmdbd")

        self._db = MySQLClient(self, "db")

        event_handler_bindings = {
            self.on.install: self._on_install,
            self.on.config_changed: self._write_config_and_restart_slurmdbd,
            self._db.on.database_available:
            self._write_config_and_restart_slurmdbd,
            self._slurmdbd.on.munge_key_available:
            self._write_config_and_restart_slurmdbd,
            self._slurmdbd.on.slurmctld_unavailable:
            self._on_slurmctld_unavailable,
            self.on.upgrade_charm: self._on_upgrade,
        }
        for event, handler in event_handler_bindings.items():
            self.framework.observe(event, handler)

    def _on_install(self, event):
        self._slurm_manager.install()
        self._stored.slurm_installed = True
        self.unit.status = ActiveStatus("Slurm Installed")

    def _on_upgrade(self, event):
        """Handle upgrade charm event."""
        logger.debug('_on_upgrade(): entering')
        #self._slurm_manager.upgrade()
        resource_path = str(self.model.resources.fetch('slurm'))

        subprocess.call([
            "snap",
            "install",
            resource_path,
            "--dangerous",
            "--classic",
        ])

    def _on_slurmctld_unavailable(self, event):
        self.unit.status = BlockedStatus("Need relation to slurmctld.")

    def _check_status(self) -> bool:
        """Check that we have the things we need."""
        db_info = self._stored.db_info
        munge_key = self._stored.munge_key
        slurm_installed = self._stored.slurm_installed

        if not (db_info and slurm_installed and munge_key):
            if not self._stored.db_info:
                self.unit.status = BlockedStatus("Need relation to MySQL.")
            elif not self._stored.munge_key:
                self.unit.status = BlockedStatus("Need relation to slurmctld.")
            return False
        return True

    def _write_config_and_restart_slurmdbd(self, event):
        """Check for prereqs before writing config/restart of slurmdbd."""
        if not self._check_status():
            event.defer()
            return

        slurmdbd_host_port_addr = {
            'slurmdbd_hostname': socket.gethostname().split(".")[0],
            'slurmdbd_port': "6819",
        }
        slurmdbd_config = {
            'munge_key': self._stored.munge_key,
            **slurmdbd_host_port_addr,
            **self.model.config,
            **self._stored.db_info,
        }
        self._slurm_manager.render_config_and_restart(slurmdbd_config)
        self._slurmdbd.set_slurmdbd_available_on_unit_relation_data()
        self.unit.status = ActiveStatus("Slurmdbd Available")

    def set_munge_key(self, munge_key):
        """Set the munge key in the stored state."""
        self._stored.munge_key = munge_key

    def set_db_info(self, db_info):
        """Set the db_info in the stored state."""
        self._stored.db_info = db_info
예제 #4
0
파일: charm.py 프로젝트: ducks23/rev2
class SlurmdCharm(CharmBase):
    """Slurmd lifecycle events."""

    _stored = StoredState()

    def __init__(self, *args):
        """Init _stored attributes and interfaces, observe events."""
        super().__init__(*args)

        self._stored.set_default(
            user_node_state=str(),
            partition_name=str(),
            config_available=False,
        )

        self._nrpe = Nrpe(self, "nrpe-external-master")

        self._slurm_manager = SlurmManager(self, "slurmd")

        self._slurmd = Slurmd(self, "slurmd")
        self._slurmd_peer = SlurmdPeer(self, "slurmd-peer")

        event_handler_bindings = {
            self.on.install: self._on_install,
            self.on.upgrade_charm: self._on_upgrade,
            self.on.config_changed: self._on_send_slurmd_info,
            self._slurmd_peer.on.slurmd_peer_available:
            self._on_send_slurmd_info,
            self._slurmd.on.slurm_config_available:
            self._on_check_status_and_write_config,
            self.on.set_node_state_action: self._on_set_node_state_action,
        }
        for event, handler in event_handler_bindings.items():
            self.framework.observe(event, handler)

    def _on_config_changed(self, event):
        self.get_set_return_partition_name()
        self._on_send_slurmd_info(event)

    def _on_install(self, event):
        self._slurm_manager.install()
        self._stored.slurm_installed = True
        self.unit.status = ActiveStatus("Slurm Installed")

    def _on_upgrade(self, event):
        self._slurm_manager.upgrade()

    def _on_set_node_state_action(self, event):
        """Set the node state."""
        self._stored.user_node_state = event.params["node-state"]
        self._on_send_slurm_info(event)

    def _on_send_slurmd_info(self, event):
        if self.framework.model.unit.is_leader():
            if self._slurmd.is_joined:
                partition = self._assemble_partition()
                if partition:
                    self._slurmd.set_slurmd_info_on_app_relation_data(
                        partition)
                    return
            event.defer()
            return

    def _on_check_status_and_write_config(self, event):
        if not self._check_status():
            event.defer()
            return
        slurm_config = dict(self._slurmd.get_slurm_config())
        self._slurm_manager.render_config_and_restart(slurm_config)
        self.unit.status = ActiveStatus("Slurmd Available")

    def _check_status(self):
        slurm_installed = self._stored.slurm_installed
        config_available = self._stored.config_available

        if not (slurm_installed and config_available):
            self.unit.status = BlockedStatus(
                "NEED RELATION TO SLURM CONFIGURATOR")
            return False
        else:
            return True

    def _assemble_partition(self):
        """Assemble the partition info."""
        partition_name = self._stored.partition_name
        partition_config = self.model.config.get('partition-config')
        partition_state = self.model.config.get('partition-state')

        slurmd_info = self._assemble_slurmd_info()

        return {
            'inventory': slurmd_info,
            'partition_name': partition_name,
            'partition_state': partition_state,
            'partition_config': partition_config,
        }

    def _assemble_slurmd_info(self):
        """Apply mutations to nodes in the partition, return slurmd nodes."""
        slurmd_info = self._slurmd_peer.get_slurmd_info()
        if not slurmd_info:
            return None

        # If the user has set custom state for nodes
        # ensure we update the state for the targeted nodes.
        user_node_state = self._stored.user_node_state
        if user_node_state:
            node_states = {
                item.split("=")[0]: item.split("=")[1]
                for item in user_node_state.split(",")
            }

            # Copy the slurmd_info returned from the the slurmd-peer relation
            # to a temporary variable to which we will make modifications.
            slurmd_info_tmp = copy.deepcopy(slurmd_info)

            # Iterate over the slurmd nodes in the partition and check
            # for nodes that need their state modified.
            for partition in slurmd_info:
                partition_tmp = copy.deepcopy(partition)
                for slurmd_node in partition['inventory']:
                    if slurmd_node['hostname'] in node_states.keys():
                        slurmd_node_tmp = copy.deepcopy(slurmd_node)
                        slurmd_node_tmp['state'] = \
                            node_states[slurmd_node['hostname']]
                        partition_tmp['inventory'].remove(slurmd_node)
                        partition_tmp['inventory'].append(slurmd_node_tmp)
                slurmd_info_tmp.remove(partition)
                slurmd_info_tmp.append(partition_tmp)
        else:
            slurmd_info_tmp = slurmd_info

        return slurmd_info_tmp

    def get_set_return_partition_name(self):
        """Set the partition name."""
        # Determine if a partition-name config exists, if so
        # ensure the partition_name known by the charm is consistent.
        # If no partition name has been specified then generate one.
        partition_name = self.model.config.get('partition-name')
        if partition_name:
            if partition_name != self._stored.partition_name:
                self._stored.partition_name = partition_name
        elif not self._stored.partition_name:
            self._stored.partition_name = f"juju-compute-{random_string()}"
        return self._stored.partition_name

    def get_slurm_component(self):
        """Return the slurm component."""
        return self._slurm_manager.slurm_component

    def get_hostname(self):
        """Return the hostname."""
        return self._slurm_manager.hostname

    def get_port(self):
        """Return the port."""
        return self._slurm_manager.port
예제 #5
0
class SlurmctldCharm(CharmBase):
    """Slurmctld lifecycle events."""

    _stored = StoredState()

    def __init__(self, *args):
        """Init _stored attributes and interfaces, observe events."""
        super().__init__(*args)

        self._stored.set_default(
            munge_key=str(),
            slurmctld_controller_type=str(),
        )

        self._nrpe = Nrpe(self, "nrpe-external-master")

        self._slurm_manager = SlurmManager(self, "slurmctld")

        self._slurmctld = Slurmctld(self, "slurmctld")
        self._slurmctld_peer = SlurmctldPeer(self, "slurmctld-peer")

        event_handler_bindings = {
            self.on.install:
            self._on_install,
            self._slurmctld.on.slurm_config_available:
            self._on_check_status_and_write_config,
            self._slurmctld_peer.on.slurmctld_peer_available:
            self._on_slurmctld_peer_available,
        }
        for event, handler in event_handler_bindings.items():
            self.framework.observe(event, handler)

    def _on_install(self, event):
        self._slurm_manager.install()
        self._stored.slurm_installed = True
        self.unit.status = ActiveStatus("Slurm Installed")

    def _on_upgrade(self, event):
        self._slurm_manager.upgrade()

    def _on_slurmctld_peer_available(self, event):
        if self.framework.model.unit.is_leader():
            if self._slurmctld.is_joined:
                slurmctld_info = self._slurmctld_peer.get_slurmctld_info()
                if slurmctld_info:
                    self._slurmctld.set_slurmctld_info_on_app_relation_data(
                        slurmctld_info)
                    return
            event.defer()
            return

    def _on_check_status_and_write_config(self, event):
        if not self._check_status():
            event.defer()
            return

        slurm_config = self._slurmctld.get_slurm_config_from_relation()
        if not slurm_config:
            event.defer()
            return

        munge_key = self._stored.munge_key
        if not munge_key:
            event.defer()
            return

        self._slurm_manager.render_config_and_restart({
            **slurm_config, 'munge_key':
            munge_key
        })
        self.unit.status = ActiveStatus("Slurmctld Available")

    def _check_status(self):
        munge_key = self._stored.munge_key
        slurm_installed = self._stored.slurm_installed
        slurm_config = self._slurmctld.get_slurm_config_from_relation()

        if not (munge_key and slurm_installed and slurm_config):
            if not munge_key:
                self.unit.status = BlockedStatus(
                    "NEED RELATION TO SLURM CONFIGURATOR")
            elif not slurm_config:
                self.unit.status = BlockedStatus("WAITING ON SLURM CONFIG")
            else:
                self.unit.status = BlockedStatus("SLURM NOT INSTALLED")
            return False
        else:
            return True

    def set_munge_key(self, munge_key):
        """Set the munge_key in _stored state."""
        self._stored.munge_key = munge_key

    def get_slurm_component(self):
        """Return the slurm component."""
        return self._slurm_manager.slurm_component

    def get_hostname(self):
        """Return the hostname."""
        return self._slurm_manager.hostname

    def get_port(self):
        """Return the port."""
        return self._slurm_manager.port
예제 #6
0
class SlurmdCharm(CharmBase):
    """Operator charm responsible for facilitating slurmd lifecycle events."""

    _stored = StoredState()

    def __init__(self, *args):
        """Initialize charm state, and observe charm lifecycle events."""
        super().__init__(*args)

        self.config = self.model.config
        self.slurm_manager = SlurmManager(self, 'slurmd')
        self.slurmd = SlurmdProvides(self, "slurmd")

        self._stored.set_default(
            slurm_installed=False,
            slurm_config_available=False,
            slurm_config=dict(),
        )

        event_handler_bindings = {
            self.on.install: self._on_install,
            self.on.config_changed: self._on_config_changed,
            self.on.upgrade_charm: self._on_upgrade,
            self.slurmd.on.slurmctld_available:
            self._on_render_config_and_restart,
            self.slurmd.on.slurmctld_unavailable:
            self._on_render_config_and_restart,
        }
        for event, handler in event_handler_bindings.items():
            self.framework.observe(event, handler)

    def _on_install(self, event):
        """Install the slurm scheduler as snap or tar file."""
        self.slurm_manager.install()
        self.unit.status = ActiveStatus("Slurm Installed")
        self._stored.slurm_installed = True

    def _on_upgrade(self, event):
        """Upgrade charm event handler."""
        slurm_config = dict(self._stored.slurm_config)
        self.slurm_manager.upgrade(slurm_config, resource=False)

    def _on_config_changed(self, event):
        self.slurmd.force_set_config_on_app_relation_data()

    def _on_render_config_and_restart(self, event):
        """Retrieve slurm_config from controller and write slurm.conf."""
        slurm_installed = self._stored.slurm_installed
        slurm_config_available = self._stored.slurm_config_available

        if (slurm_installed and slurm_config_available):
            # cast StoredState -> python dict
            slurm_config = dict(self._stored.slurm_config)
            self.slurm_manager.render_config_and_restart(slurm_config)
            self.unit.status = ActiveStatus("Slurmd Available")
        else:
            self.unit.status = BlockedStatus(
                "Blocked need relation to slurmctld."
            )
            event.defer()
            return

    def is_slurm_installed(self):
        """Return true/false based on whether or not slurm is installed."""
        return self._stored.slurm_installed

    def set_slurm_config_available(self, config_available):
        """Set slurm_config_available in local stored state."""
        self._stored.slurm_config_available = config_available

    def set_slurm_config(self, slurm_config):
        """Set the slurm_config in local stored state."""
        self._stored.slurm_config = slurm_config
예제 #7
0
class SlurmdbdCharm(CharmBase):
    """Slurmdbd Charm."""

    _stored = StoredState()

    def __init__(self, *args):
        """Set the default class attributes."""
        super().__init__(*args)

        self._stored.set_default(munge_key=str())
        self._stored.set_default(db_info=dict())
        self._stored.set_default(slurm_installed=False)

        self._nrpe = Nrpe(self, "nrpe-external-master")

        self._slurm_manager = SlurmManager(self, "slurmdbd")

        self._slurmdbd = Slurmdbd(self, "slurmdbd")
        self._slurmdbd_peer = SlurmdbdPeer(self, "slurmdbd-peer")

        self._db = MySQLClient(self, "db")

        event_handler_bindings = {
            self.on.install: self._on_install,
            self.on.config_changed: self._write_config_and_restart_slurmdbd,
            self._db.on.database_available:
            self._write_config_and_restart_slurmdbd,
            self._slurmdbd_peer.on.slurmdbd_peer_available:
            self._write_config_and_restart_slurmdbd,
            self._slurmdbd.on.slurmdbd_available:
            self._write_config_and_restart_slurmdbd,
            self._slurmdbd.on.slurmdbd_unavailable:
            self._on_slurmdbd_unavailable,
        }
        for event, handler in event_handler_bindings.items():
            self.framework.observe(event, handler)

    def _on_install(self, event):
        self._slurm_manager.install()
        self._stored.slurm_installed = True
        self.unit.status = ActiveStatus("Slurm Installed")

    def _on_upgrade(self, event):
        """Handle upgrade charm event."""
        self._slurm_manager.upgrade()

    def _on_leader_elected(self, event):
        self._slurmdbd_peer._on_relation_changed(event)

    def _on_slurmdbd_unavailable(self, event):
        self._check_status()

    def _check_status(self) -> bool:
        """Check that we have the things we need."""
        db_info = self._stored.db_info
        munge_key = self._stored.munge_key
        slurm_installed = self._stored.slurm_installed
        slurmdbd_info = self._slurmdbd_peer.get_slurmdbd_info()

        deps = [
            slurmdbd_info,
            db_info,
            slurm_installed,
            munge_key,
        ]

        if not all(deps):
            if not db_info:
                self.unit.status = BlockedStatus("Need relation to MySQL.")
            elif not munge_key:
                self.unit.status = BlockedStatus(
                    "Need relation to slurm-configurator.")
            return False
        return True

    def _write_config_and_restart_slurmdbd(self, event):
        """Check for prereqs before writing config/restart of slurmdbd."""
        if not self._check_status():
            event.defer()
            return

        db_info = self._stored.db_info
        slurmdbd_info = self._slurmdbd_peer.get_slurmdbd_info()

        slurmdbd_config = {
            'munge_key': self._stored.munge_key,
            **self.model.config,
            **slurmdbd_info,
            **db_info,
        }
        if self.model.unit.is_leader():
            self._slurmdbd.set_slurmdbd_info_on_app_relation_data(
                slurmdbd_info)
        self._slurm_manager.render_config_and_restart(slurmdbd_config)
        self.unit.status = ActiveStatus("Slurmdbd Available")

    def get_port(self):
        """Return the port from slurm-ops-manager."""
        return self._slurm_manager.port

    def get_hostname(self):
        """Return the hostname from slurm-ops-manager."""
        return self._slurm_manager.hostname

    def get_slurm_component(self):
        """Return the slurm component."""
        return self._slurm_manager.slurm_component

    def set_munge_key(self, munge_key):
        """Set the munge key in the stored state."""
        self._stored.munge_key = munge_key

    def set_db_info(self, db_info):
        """Set the db_info in the stored state."""
        self._stored.db_info = db_info
예제 #8
0
class SlurmdbdCharm(CharmBase):
    """Slurmdbd Charm."""

    _stored = StoredState()

    def __init__(self, *args):
        """Set the default class attributes."""
        super().__init__(*args)

        self._stored.set_default(munge_key=str())
        self._stored.set_default(db_info=dict())
        self._stored.set_default(slurm_installed=False)

        self._nrpe = Nrpe(self, "nrpe-external-master")

        self._slurm_manager = SlurmManager(self, "slurmdbd")

        self._slurmdbd = Slurmdbd(self, "slurmdbd")
        self._slurmdbd_peer = SlurmdbdPeer(self, "slurmdbd-peer")

        self._db = MySQLClient(self, "db")

        event_handler_bindings = {
            self.on.install: self._on_install,
            self.on.config_changed: self._write_config_and_restart_slurmdbd,
            self._db.on.database_available:
            self._write_config_and_restart_slurmdbd,
            self._slurmdbd_peer.on.slurmdbd_peer_available:
            self._write_config_and_restart_slurmdbd,
            self._slurmdbd.on.slurmdbd_available:
            self._write_config_and_restart_slurmdbd,
            self._slurmdbd.on.slurmdbd_unavailable:
            self._on_slurmdbd_unavailable,
        }
        for event, handler in event_handler_bindings.items():
            self.framework.observe(event, handler)

    def _on_install(self, event):
        self._slurm_manager.install()
        self._stored.slurm_installed = True
        self.unit.status = ActiveStatus("Slurm Installed")

    def _on_upgrade(self, event):
        """Handle upgrade charm event."""
        self._slurm_manager.upgrade()

    def _on_leader_elected(self, event):
        self._slurmdbd_peer._on_relation_changed(event)

    def _on_slurmdbd_unavailable(self, event):
        self._check_status()

    def _check_status(self) -> bool:
        """Check that we have the things we need."""
        db_info = self._stored.db_info
        munge_key = self._stored.munge_key
        slurm_installed = self._stored.slurm_installed
        slurmdbd_info = self._slurmdbd_peer.get_slurmdbd_info()

        deps = [
            slurmdbd_info,
            db_info,
            slurm_installed,
            munge_key,
        ]

        if not all(deps):
            if not db_info:
                self.unit.status = BlockedStatus("Need relation to MySQL.")
            elif not munge_key:
                self.unit.status = BlockedStatus(
                    "Need relation to slurm-configurator.")
            return False
        return True

    def _write_config_and_restart_slurmdbd(self, event):
        """Check for prereqs before writing config/restart of slurmdbd."""
        # Ensure all pre-conditions are met with _check_statu(), if not
        # defer the event.
        if not self._check_status():
            event.defer()
            return

        db_info = self._stored.db_info
        slurmdbd_info = self._slurmdbd_peer.get_slurmdbd_info()

        slurmdbd_config = {
            'munge_key': self._stored.munge_key,
            **self.model.config,
            **slurmdbd_info,
            **db_info,
        }

        self._slurm_manager.render_config_and_restart(slurmdbd_config)
        logger.debug("rendering config and restarting")
        # Only the leader can set relation data on the application.
        # Enforce that no one other then the leader trys to set
        # application relation data.
        if self.model.unit.is_leader():
            self._slurmdbd.set_slurmdbd_info_on_app_relation_data({
                # Juju, and subsequently the operator framework do not
                # emit relation-changed events if data hasn't actually
                # changed on the other side of the relation. Even if we set
                # the data multiple times, it doesn't mean anything unless
                # the data being set is different then what already exists
                # in the relation data.
                #
                # We use 'slurmdbd_info_id' to ensure the slurmdbd_info
                # is unique each time it is set on the application relation
                # data. This is needed so that that related applications
                # (namely slurm-configurator) will observe a
                # relation-changed event.
                #
                # This event (_write_config_and_restart_slurmdbd) may be
                # invoked multiple times once _check_status() returns True
                # (aka pre-conditions are met that account for the deffered
                # invocations.)
                # This means that the same slurmdbd_info data may be set on
                # application data multiple times and slurmdbd may be
                # reconfigured and restarted while slurmctld and the rest
                # of the stack are trying to come up and create the clustr.
                #
                # We need slurm-configurator to emit the relation-changed
                # event for the slurmdbd relation every time data is set,
                # not just when data has changed.
                # slurm-configurator need to re-emit its chain
                # of observed events to ensure all services end up getting
                # reconfigured *and* restarted *after* slurmdbd, for each
                # time that slurmdbd gets reconfigured and restarted.
                #
                # For this reason, 'slurmdbd_info_id' only
                # matters in the context of making sure the application
                # relation data actually changes so that relation-changed
                # event is observed on the other side.
                'slurmdbd_info_id':
                str(uuid.uuid4()),
                **slurmdbd_info
            })
        self.unit.status = ActiveStatus("Slurmdbd Available")

    def get_port(self):
        """Return the port from slurm-ops-manager."""
        return self._slurm_manager.port

    def get_hostname(self):
        """Return the hostname from slurm-ops-manager."""
        return self._slurm_manager.hostname

    def get_slurm_component(self):
        """Return the slurm component."""
        return self._slurm_manager.slurm_component

    def set_munge_key(self, munge_key):
        """Set the munge key in the stored state."""
        self._stored.munge_key = munge_key

    def set_db_info(self, db_info):
        """Set the db_info in the stored state."""
        self._stored.db_info = db_info