Beispiel #1
0
    def exists_group(self, name: str) -> Result:
        """
        Test if a SLURM account exists with a specified name

        Parameters
        ----------
        name : str
            Name to check against SLURM account names

        Returns
        ---------
        Result
            Result.status is 0 for success, >0 on error
            Result.response is True for exists, False for does not exist
        """
        self._logger.info("exists_account {0}".format(name))

        cmd = self._sacctmgr + " -n list account name=" + name
        stdout = self.__run_cmd2__(cmd)
        if stdout is not None:
            if len(stdout.splitlines()) == 1:
                return Result(0, True)
            else:
                return Result(0, False)
        return Result(1, False)
Beispiel #2
0
    def add_user(self, user_atts: dict) -> Result:
        """
        Add a new SLURM user. If the user already exists it is no overwritten.

        Parameters
        ----------
        user_atts: dict
            Properties defining a SLURM user

        Returns
        ---------
        Result
            Result.status is 0 for success, >0 on error

        See Also
        --------
        update_user
        """
        self._logger.info("add_user {0}".format(user_atts))
        user = SlurmUser(user_atts)

        # If the user already exists, do nothing
        result = self.exists_user(user.name())
        if result.is_error():
            return Result(1, "Error testing user exists")
        elif result.response is True:
            return Result(0, "User already exists")

        # Otherwise, add the user
        cmd = self._sacctmgr + " -i create user {0}".format(user)
        status = self.__run_cmd__(cmd)
        return Result(status, None)
Beispiel #3
0
    def get_members(self, name: str) -> Result:
        """
        Retrieve the user names of all users that are associated with the account

        Parameters
        ----------
        name : str
            SLURM account name

        Returns
        ---------
        Result
            Result.status is 0 for success, >0 on error
            Result.response is an array of user names associated with the SLURM account. If the operation fails, an
            empty array is returned
        """
        self._logger.info("get_members {0}".format(name))

        cmd = self._sacctmgr + " -P list account WithAssoc name='" + name + "' format='User'"
        stdout = self.__run_cmd2__(cmd)
        members = []

        if stdout is None:
            self._logger.error("Command %s requested STDOUT but returned None",
                               cmd)
            return Result(1, members)
        else:
            lines = stdout.splitlines()
            for i in range(1, len(lines)):
                if lines[i] is not '':
                    members.append(lines[i])

            self._logger.debug(members)

        return Result(0, members)
Beispiel #4
0
    def exists_user(self, name: str) -> Result:
        """
        Test if a user exists with a specified name

        Parameters
        ----------
        name : str
            SLURM user name

        Returns
        ---------
        Result
            Result.status is 0 for success, >0 on error
            Result.response is True for exists, False for does not exist
        """
        self._logger.info("exists_user {0}".format(name))

        cmd = self._sacctmgr + " -n list user name=" + name
        stdout = self.__run_cmd2__(cmd)
        if stdout is not None:
            if len(stdout.splitlines()) == 1:
                self._logger.debug("User %s does exist", name)
                return Result(0, True)
            else:
                self._logger.debug("User %s does NOT exist", name)
                return Result(0, False)
        return Result(1, "Command Error")
Beispiel #5
0
    def list_groups(self):
        """
        Retrieve the names of all SLURM accounts in the database

        Returns
        ---------
        Result
            Result.status is 0 for success, >0 on error
            Result.response is an array of account names defined in the database
        """
        self._logger.info("list_groups")

        cmd = self._sacctmgr + " -P list account"
        stdout = self.__run_cmd2__(cmd)
        accounts = []

        if stdout is None:
            self._logger.error("Command %s requested STDOUT but returned None",
                               cmd)
            return Result(1, accounts)
        else:
            lines = stdout.splitlines()
            for i in range(1, len(lines)):
                if lines[i] is not '':
                    tokens = lines[i].split("|")
                    accounts.append(tokens[0])

            self._logger.debug(accounts)

        return Result(0, accounts)
Beispiel #6
0
    def delete_user(self, name: str) -> Result:
        """
        Delete a user from SLURM.

        Parameters
        ----------
        name : str
            SLURM user name

        Returns
        ---------
        Result
            Result.status is 0 for success, >0 on error
        """
        self._logger.info("delete_user {0}".format(name))

        # If delete operations are disabled
        if self._block_delete:
            return Result(
                1, "Delete operations on users and groups are disabled")

        exists_user = self.exists_user(name)
        if exists_user.is_error():
            return Result(1, "Error testing user exists")
        elif exists_user.response is False:
            return Result(0, "User does not exist")

        cmd = self._sacctmgr + " -i delete user name=" + name
        status = self.__run_cmd__(cmd)
        return Result(status, None)
Beispiel #7
0
    def get_group(self, name: str) -> Result:
        """
        Retrieve the current configuration of an account

        Parameters
        ----------
        name : str
            SLURM account name

        Returns
        ---------
        Result
            Result.status is 0 for success, >0 on error
            Result.response is a dictionary of key=value pairs corresponding to SLURM configuration parameters. If the
            operation fails, an empty dictionary is returned.
        """
        self._logger.info("get_account {0}".format(name))

        group_atts = {"name": name}
        group_atts.update(self._schema)
        account = SlurmAccount(group_atts, self._schema)

        cmd = self._sacctmgr + " -P list account WithAssoc name='" + name + "' format='Account,User," + account.format_string(
        ) + "'"
        stdout = self.__run_cmd2__(cmd)
        account = {}

        if stdout is None:
            self._logger.error("Command %s requested STDOUT but returned None",
                               cmd)
        else:
            lines = stdout.splitlines()
            if len(lines) < 2:
                return Result(1, account)

            header_tokens = lines[0].split('|')
            for j in range(1, len(lines)):
                data_tokens = lines[j].split('|')
                if data_tokens[1] == "":
                    for i in range(len(header_tokens)):
                        account[header_tokens[i]] = data_tokens[i]
                    break

            if len(account) == 0:
                return Result(1, account)

            # Add group members to returned object
            account["memberUid"] = []
            member_result = self.get_members(name)
            if member_result.is_success():
                account["memberUid"] = member_result.response

            self._logger.debug(account)

        return Result(0, account)
Beispiel #8
0
    def send(self, obj: object) -> Result:
        """
        Publish a message to server

        Parameters
        ----------
        obj: dict
            The message to send

        Returns
        ----------
        Result
            Result.status is 0 for success, >0 on error
            Result.response is None on success, and informational message on error
        """
        # Encode the request dict as YAML
        try:
            message = yaml.safe_dump(obj)
        except yaml.YAMLError as exc:
            self._logger.error("YAML serialization error: %s", exc)
            self._logger.error("{0}".format(obj))
            return Result(453, "Could not serialize the message as YAML")

        # Send RPC request to server
        self._logger.debug("Sending %s:%s", self.queue, message.rstrip())

        # Send message to server
        not_sent = True
        attempts = 1
        while not_sent and attempts < 10:
            try:
                self.channel.basic_publish(exchange='',
                                           routing_key=self.queue,
                                           body=message,
                                           properties=pika.BasicProperties(
                                               delivery_mode=2,
                                               # Indicates message should be persisted on disk
                                           ),
                                           mandatory=True)
                not_sent = False
            except Exception as e:
                time.sleep((attempts - 1) * 5)
                if self.connection.is_closed:
                    self._logger.error("Failed to send on attempt %d because connection closed. Reconnecting...", attempts)
                    self.connect()

            attempts = attempts + 1

        if not_sent:
            self._logger.error("Failed to deliver message %s:%s", self.queue, message.rstrip())
            return Result(470, "Message could not be delivered")
        else:
            self._logger.debug("Success")
            return Result(0, None)
Beispiel #9
0
    def error(self, action: str, params: object, result: Result) -> bool:
        """
        Add an error entry to the database

        Parameters
        ----------
        action : str
            A unique identifier for the action, usually ${method_name}:${class name}
        params : object
            The arguments to the method as scalar, list or dict
        result: metaroot.api.Result
            The Result of the failed operation that contains more granular information about the error

        Returns
        ---------
        metaroot.api.Result
            True for success

        Raises
        ---------
        Exception
            if the database if an underlying operation raised an exception
        """
        return self._insert(
            (datetime.datetime.now(), ActivityStream.ERROR, action,
             yaml.safe_dump(params), result.status,
             yaml.safe_dump(result.to_transport_format())))
Beispiel #10
0
    def disassociate_users_from_group(self, user_names: list,
                                      group_name: str) -> Result:
        """
        Remove a user's association with an account (revoke membership)

        Parameters
        ----------
        user_names : list
            SLURM user names
        group_name : str
            SLURM account name

        Returns
        ---------
        Result
            Result.status is 0 for success, >0 on error
            Result.response is a list of user names that had their default account set to "bench" by the operation
        """
        self._logger.info("disassociate_users_from_account {0}, {1}".format(
            user_names, group_name))

        global_status = 0
        affected = []
        for user_name in user_names:
            result = self.disassociate_user_from_group(user_name, group_name)
            global_status = global_status + result.status
            if result.response:
                affected.append(user_name)
        return Result(global_status, affected)
Beispiel #11
0
    def associate_user_to_group(self, user_name: str,
                                group_name: str) -> Result:
        """
        Associate an account with a user (grant membership)

        Parameters
        ----------
        user_name : str
            SLURM user name
        group_name : str
            SLURM account name, optionally prefixed with cluster name delimited by ':'

        Returns
        ---------
        Result
            Result.status is 0 for success, >0 on error
        """
        self._logger.info("associate_user_to_account {0} {1}".format(
            user_name, group_name))

        if ":" in group_name:
            parts = group_name.split(":")
            cluster = " Cluster='" + parts[0] + "'"
            group_name = parts[1]
        else:
            cluster = ""

        cmd = self._sacctmgr + " -i add user name='" + user_name + "' account='" + group_name + "'" + cluster
        status = self.__run_cmd__(cmd)
        return Result(status, None)
Beispiel #12
0
    def occur_in_response_to(clazz: str, action: str, payload: object,
                             result: Result, n_priors: int) -> int:
        """
        Evaluates the result of an action and performs additional actions as necessary

        Parameters
        ----------
        clazz: str
            The class name that implemented the method handling the action
        action: str
            The name of the method implemented by clazz that was called
        payload: object
            The argument that were passed to the method
        result: Result
            The result of the method call
        n_priors: int
            A value indicating the number of reactions that have occurred during the requested operation. I.e., this
            value is set to 0 as the router begins calling methods of each manager implementing the current request
            action, and it increases by one each time a Result from a manager operation triggers a reaction.
        """
        global config

        if result.is_error():
            send_email(
                config.get("REACTION_NOTIFY"), "metaroot operation failed",
                "<table>" + "<tr><td>Class</td><td>" + clazz + "</td></tr>"
                "<tr><td>Action</td><td>" + action + "</td></tr>" +
                "<tr><td>Payload</td><td>" + str(payload) + "</td></tr>" +
                "<tr><td>Result Status</td><td>" + str(result.status) +
                "</td></tr>" + "<tr><td>Result Payload</td><td>" +
                str(result.response) + "</td></tr>" + "</table>")
        return 0
Beispiel #13
0
    def set_user_default_group(self, user_name: str,
                               group_name: str) -> Result:
        """
        Set a user's default account affiliation

        Parameters
        ----------
        user_name : str
            SLURM user name
        group_name : str
            SLURM account name

        Returns
        ---------
        Result
            Result.status is 0 for success, >0 on error
        """
        self._logger.info("set_user_default_account {0}, {1}".format(
            user_name, group_name))

        if ":" in group_name:
            parts = group_name.split(":")
            cluster = " Cluster='" + parts[0] + "'"
            group_name = parts[1]
        else:
            cluster = ""

        cmd = self._sacctmgr + " -i modify user where name=" + user_name + " " + cluster + " set defaultaccount=" + group_name
        status = self.__run_cmd__(cmd)
        return Result(status, None)
Beispiel #14
0
    def record(self, action: str, params: object, result: Result) -> bool:
        """
        Adds an entry to the database as info if result.is_success() and as error otherwise

        Parameters
        ----------
        action : str
            A unique identifier for the action, usually ${method_name}:${class name}
        params : object
            The arguments to the method as scalar, list or dict
        result: metaroot.api.Result
            The Result of the operation

        Returns
        ---------
        metaroot.api.Result
            True for success

        Raises
        ---------
        Exception
            if the database if an underlying operation raised an exception
        """
        if result.is_success():
            return self.info(action, params)
        else:
            return self.error(action, params, result)
Beispiel #15
0
 def echo(self, message: str) -> Result:
     global sequence
     if message != "hello {0}".format(sequence):
         raise Exception("Expecting 'hello {0}' but consumed '{1}'".format(
             sequence, message))
     sequence = sequence + 1
     return Result(0, None)
Beispiel #16
0
    def add_group(self, group_atts: dict) -> Result:
        """
        Add a new SLURM account

        Parameters
        ----------
        group_atts : dict
            Properties defining the SLURM account

        Returns
        ---------
        Result
            Result.status is 0 for success, >0 if the operation failed (e.g., if the account already exists)
        """
        self._logger.info("add_account {0}".format(group_atts))
        account = SlurmAccount(group_atts, self._schema)

        exists_group = self.exists_group(account.name())
        if exists_group.is_error():
            return Result(1, "Error checking for existence of group")
        elif exists_group.response is True:
            return Result(0, "Group already exists")

        # base command
        cmd = self._sacctmgr + " -i -Q add account name={0}".format(
            account.name())

        # add cluster if one has been defined. not specifying a cluster creates associations for all clusters
        if account.cluster() is not None:
            cmd = cmd + " cluster={0}".format(account.cluster())

        status = self.__run_cmd__(cmd)
        if status > 0:
            return Result(status, None)

        cmd = self._sacctmgr + " -i modify account {0}".format(
            account.as_update_str())
        status = self.__run_cmd__(cmd)
        return Result(status, None)
Beispiel #17
0
    def list_users(self, with_default_group: str):
        """
        Retrieve the names of all SLURM users in the database

        Parameters
        ----------
        with_default_group: str
            Either the string "any" meaning any group, or a string id of a group that will restrict the result to only
            users with the specified group set as their default

        Returns
        ---------
        Result
            Result.status is 0 for success, >0 on error
            Result.response is an array of user names defined in the database
        """
        self._logger.info("list_users")

        if with_default_group == "any":
            cmd = self._sacctmgr + " -P list user"
        else:
            cmd = self._sacctmgr + " -P list user where defaultaccount=\"" + with_default_group + "\""
        stdout = self.__run_cmd2__(cmd)
        users = []

        if stdout is None:
            self._logger.error("Command %s requested STDOUT but returned None",
                               cmd)
            return Result(1, users)
        else:
            lines = stdout.splitlines()
            for i in range(1, len(lines)):
                if lines[i] is not '':
                    tokens = lines[i].split("|")
                    users.append(tokens[0])

            self._logger.debug(users)

        return Result(0, users)
Beispiel #18
0
    def disassociate_user_from_group(self, user_name: str,
                                     group_name: str) -> Result:
        """
        Remove a user's association with an account (revoke membership)

        Parameters
        ----------
        user_name : str
            SLURM user name
        group_name : str
            SLURM account name

        Returns
        ---------
        Result
            Result.status is 0 for success, >0 on error
            Result.response is True if user had their default group set to "bench", False otherwise
        """
        self._logger.info("disassociate_user_from_account {0}, {1}".format(
            user_name, group_name))

        result = self.get_user(user_name)
        benched = False

        # Check if non-existent user name specified
        if 'default' not in result.response:
            self._logger.warn(
                "attempt to disassociate user %s that does not exist",
                user_name)

        # If we are trying to remove the primary group affiliation of the user, set their primary affiliation to
        # the special reserve group 'bench', in which case the user will need to select a new default account for
        # themself
        elif result.response['default'] == group_name:
            # This can fail if the user already has an association the the bench account
            self.associate_user_to_group(user_name, self._cluster + 'bench')

            # Move the user to the bench account
            benched = self.set_user_default_group(user_name, self._cluster +
                                                  'bench').is_success()
            if benched:
                self._logger.warn(
                    "disassociate_user_from_account {0}, {1} -> User was benched"
                    .format(user_name, group_name))

        # Remove the user affiliation
        cmd = self._sacctmgr + " -i delete user name='" + user_name + "' account='" + group_name + "'"
        status = self.__run_cmd__(cmd)
        return Result(status, benched)
Beispiel #19
0
    def update_group(self, group_atts: dict) -> Result:
        """
        Change the configuration of a SLURM account

        Parameters
        ----------
        group_atts: dict
            Properties defining a SLURM account to update. The name of the argument account must match the name of the
            account to update in the SLURM database.

        Returns
        ---------
        Result
            Result.status is 0 for success, >0 on error
        """
        self._logger.info("update_account {0}".format(group_atts))

        account = SlurmAccount(group_atts, self._schema)
        cmd = self._sacctmgr + " -i modify account {0}".format(
            account.as_update_str())
        status = self.__run_cmd__(cmd)
        return Result(status, None)
Beispiel #20
0
    def delete_group(self, name: str) -> Result:
        """
        Delete an account from SLURM. This operation manages migrating user default accounts away from the account
        to be deleted prior to attempting the delete operation.

        Parameters
        ----------
        name : str
            SLURM account name

        Returns
        ---------
        Result
            Result.status is 0 for success, >0 on error
        """
        self._logger.info("delete_account {0}".format(name))

        # If delete operations are disabled
        if self._block_delete:
            return Result(
                1, "Delete operations on users and groups are disabled")

        # If group does not exist, return success but provide informational message
        exists_group = self.exists_group(name)
        if exists_group.is_error():
            return Result(1, None)
        elif exists_group.response is False:
            return Result(0, "Group does not exist")

        # Get current account members
        get_members = self.get_members(name)
        if get_members.is_error():
            return Result(1, None)

        # Remove users linked to the account first. This is a complex operation because it has to change primary
        # user account affiliations before removing the account below
        remove_members = self.disassociate_users_from_group(
            get_members.response, name)
        if remove_members.is_error():
            return Result(2, None)

        # Remove the account
        cmd = self._sacctmgr + " -i delete account name=" + name
        status = self.__run_cmd__(cmd)
        return Result(status, None)
Beispiel #21
0
    def update_user(self, user_atts: dict) -> Result:
        """
        Change the configuration of a SLURM user. The argument user object must contain an "Account" attribute, as this
        method is only applicable to changing user parameters associated with an account.

        Parameters
        ----------
        user_atts : dict
            Properties defining a SLURM user. The name of the argument user must match the name of the user to update in
            the SLURM database.

        Returns
        ---------
        Result
            Result.status is 0 for success, >0 on error
        """
        self._logger.info("update_user {0}".format(user_atts))

        user = SlurmUser(user_atts)
        cmd = self._sacctmgr + " -i modify user {0}".format(
            user.as_update_str())
        status = self.__run_cmd__(cmd)
        return Result(status, None)
Beispiel #22
0
 def exists_group(self, name: str):
     return Result(0, "exists_group:" + self.name)
Beispiel #23
0
 def add_group(self, group_atts: dict) -> Result:
     return Result(0, group_atts)
Beispiel #24
0
    def send(self, obj: object) -> Result:
        """
        Method to initiate an RPC request

        Parameters
        ----------
        obj: object
            A dictionary specifying a remote method name and arguments to invoke

        Returns
        ----------
        Result
            Result.status is 0 for success, >0 on error
            Result.response is any object returned by the remote method invocation or None
        """
        # Encode the request dict as YAML
        try:
            message = yaml.safe_dump(obj)
        except yaml.YAMLError as exc:
            self.logger.error("YAML serialization error: %s", exc)
            self.logger.error("{0}".format(obj))
            return Result(453, None)

        self.response = None
        self.corr_id = str(uuid.uuid4())

        # Send RPC request to server
        not_sent = True
        attempts = 1
        while not_sent and attempts < 10:
            try:
                self.channel.basic_publish(exchange='',
                                           routing_key=self.queue,
                                           body=message,
                                           properties=pika.BasicProperties(
                                               reply_to=self.callback_queue,
                                               correlation_id=self.corr_id))
                not_sent = False
            except Exception as e:
                self.logger.info("Failed to send on attempt %d because connection closed. Reconnecting...", attempts)
                time.sleep((attempts-1)*5)
                if self.connection.is_closed:
                    self.connect()

            attempts = attempts + 1
        if not_sent:
            self.logger.error("Failed to deliver message %s:%s", self.queue, message.rstrip())
            send_email(self.config.get("NOTIFY_ON_ERROR"),
                       "Message delivery failure: " + self.__class__.__name__,
                       "Failed to deliver message {0}:{1}".format(self.queue, message.rstrip()))
            return Result(470, "Message could not be delivered")

        # Wait for response
        attempts = 1
        while self.response is None and attempts < 36:
            self.logger.debug("Waiting for callback response to %s", str(obj))
            # Process events in
            self.connection.process_data_events(time_limit=5)
            attempts = attempts + 1
        self.corr_id = None

        # If timed out waiting for response
        if attempts == 36:
            self.logger.error("Operation timed out waiting for a response to %s:%s", self.queue, message.rstrip())
            send_email(self.config.get("NOTIFY_ON_ERROR"),
                       "RPC timeout failure: " + self.__class__.__name__,
                       "No response received for message {0}:{1}".format(self.queue, message.rstrip()))
            return Result(471, "Operation timed out waiting for a response")

        # Decode the response dict as YAML
        try:
            res_obj = yaml.safe_load(self.response)
            return Result.from_transport_format(res_obj)
        except yaml.YAMLError as exc:
            self.logger.error("YAML serialization error: %s", exc)
            self.logger.error("{0}".format(obj))
            return Result(454, None)
Beispiel #25
0
 def delete_user(self, name: str):
     return Result(0, "delete_user:" + self.name)
Beispiel #26
0
 def get_user(self, name: str):
     return Result(0, "get_user:" + self.name)
Beispiel #27
0
 def update_user(self, user_atts: dict):
     return Result(0, "update_user:" + self.name)
Beispiel #28
0
 def add_user(self, user_atts: dict):
     return Result(0, "add_user:" + self.name)
Beispiel #29
0
 def delete_group(self, name: str):
     return Result(0, "delete_group:" + self.name)
Beispiel #30
0
 def update_group(self, group_atts: dict):
     return Result(0, "update_group:" + self.name)