Beispiel #1
0
    def test_slurm_user_accounts(self):
        """Test that the commands to create, change and remove users are correctly generated."""
        vo_members = {
            "vo1": (set(["user1", "user2", "user3"]), VO(vsc_id="vo1", institute={"site": "gent"})),
            "vo2": (set(["user4", "user5", "user6"]), VO(vsc_id="vo2", institute={"site": "gent"})),
        }

        active_accounts = set(["user1", "user3", "user4", "user5", "user6", "user7"])
        slurm_user_info = [
            SlurmUser(User='******', Def_Acct='vo1', Admin='None', Cluster='banette', Account='vo1', Partition='', Share='1', MaxJobs='', MaxNodes='', MaxCPUs='', MaxSubmit='', MaxWall='', MaxCPUMins='', QOS='normal', Def_QOS=''),
            SlurmUser(User='******', Def_Acct='vo1', Admin='None', Cluster='banette', Account='vo1', Partition='', Share='1', MaxJobs='', MaxNodes='', MaxCPUs='', MaxSubmit='', MaxWall='', MaxCPUMins='', QOS='normal', Def_QOS=''),
            SlurmUser(User='******', Def_Acct='vo2', Admin='None', Cluster='banette', Account='vo2', Partition='', Share='1', MaxJobs='', MaxNodes='', MaxCPUs='', MaxSubmit='', MaxWall='', MaxCPUMins='', QOS='normal', Def_QOS=''),
            SlurmUser(User='******', Def_Acct='vo1', Admin='None', Cluster='banette', Account='vo1', Partition='', Share='1', MaxJobs='', MaxNodes='', MaxCPUs='', MaxSubmit='', MaxWall='', MaxCPUMins='', QOS='normal', Def_QOS=''),
            SlurmUser(User='******', Def_Acct='vo2', Admin='None', Cluster='banette', Account='vo2', Partition='', Share='1', MaxJobs='', MaxNodes='', MaxCPUs='', MaxSubmit='', MaxWall='', MaxCPUMins='', QOS='normal', Def_QOS=''),
        ]

        commands = slurm_user_accounts(vo_members, active_accounts, slurm_user_info, ["banette"])

        self.assertEqual(set([tuple(x) for x in commands]), set([tuple(x) for x in [
            shlex.split("/usr/bin/sacctmgr add user user6 Account=vo2 DefaultAccount=vo2 Cluster=banette"),
            shlex.split("/usr/bin/sacctmgr delete user name=user2 Cluster=banette"),
            shlex.split("/usr/bin/sacctmgr add user user3 Account=vo1 DefaultAccount=vo1 Cluster=banette"),
            shlex.split("/usr/bin/sacctmgr delete user name=user3 Account=vo2 where Cluster=banette"),
            shlex.split("/usr/bin/sacctmgr add user user4 Account=vo2 DefaultAccount=vo2 Cluster=banette"),
            shlex.split("/usr/bin/sacctmgr delete user name=user4 Account=vo1 where Cluster=banette"),
        ]]))
def main():
    """
    Main script. The usual.
    """

    options = {
        "nagios-check-interval-threshold":
        NAGIOS_CHECK_INTERVAL_THRESHOLD,
        "access_token": ("OAuth2 token to access the account page REST API",
                         None, "store", None),
        "account_page_url": (
            "URL of the account page where we can find the REST API",
            str,
            "store",
            "https://apivsc.ugent.be/django",
        ),
        'host_institute':
        ('Name of the institute where this script is being run', str, 'store',
         GENT),
        "clusters": (
            "Cluster(s) (comma-separated) to sync for. "
            "Overrides <host_institute>_SLURM_COMPUTE_CLUSTERS that are in production.",
            "strlist",
            "store",
            [],
        ),
        'start_timestamp':
        ('Timestamp to start the sync from', str, 'store', None),
        'cluster_classes':
        ('Classes of clusters that should be synced, comma-separated',
         "strlist", 'store', [PRODUCTION, PILOT])
    }

    opts = ExtendedSimpleOption(options)
    stats = {}

    (last_timestamp, start_time) = retrieve_timestamp_with_default(
        SYNC_TIMESTAMP_FILENAME, start_timestamp=opts.options.start_timestamp)
    logging.info("Using timestamp %s", last_timestamp)
    logging.info("Using startime %s", start_time)

    try:
        client = AccountpageClient(token=opts.options.access_token,
                                   url=opts.options.account_page_url + "/api/")
        host_institute = opts.options.host_institute

        slurm_account_info = get_slurm_acct_info(SyncTypes.accounts)
        slurm_user_info = get_slurm_acct_info(SyncTypes.users)

        logging.debug("%d accounts found", len(slurm_account_info))
        logging.debug("%d users found", len(slurm_user_info))

        if opts.options.clusters:
            clusters = opts.options.clusters
        else:
            clusters = [
                cs for p in opts.options.cluster_classes
                for cs in VSC_SLURM_CLUSTERS[host_institute][p]
            ]
        sacctmgr_commands = []

        # All users belong to a VO, so fetching the VOs is necessary/
        account_page_vos = [
            mkVo(v)
            for v in client.vo.institute[opts.options.host_institute].get()[1]
        ]

        # make sure the institutes and the default accounts (VOs) are there for each cluster
        institute_vos = dict([
            (v.vsc_id, v) for v in account_page_vos
            if v.vsc_id in INSTITUTE_VOS_BY_INSTITUTE[host_institute].values()
        ])
        sacctmgr_commands += slurm_institute_accounts(slurm_account_info,
                                                      clusters, host_institute,
                                                      institute_vos)

        # The VOs do not track active state of users, so we need to fetch all accounts as well
        active_accounts = set(
            [a["vsc_id"] for a in client.account.get()[1] if a["isactive"]])

        # dictionary mapping the VO vsc_id on a tuple with the VO members and the VO itself
        account_page_members = dict([(vo.vsc_id, (set(vo.members), vo))
                                     for vo in account_page_vos])

        # process all regular VOs
        sacctmgr_commands += slurm_vo_accounts(account_page_vos,
                                               slurm_account_info, clusters,
                                               host_institute)

        # process VO members
        sacctmgr_commands += slurm_user_accounts(account_page_members,
                                                 active_accounts,
                                                 slurm_user_info, clusters,
                                                 opts.options.dry_run)

        logging.info("Executing %d commands", len(sacctmgr_commands))

        if opts.options.dry_run:
            print("Commands to be executed:\n")
            print("\n".join([" ".join(c) for c in sacctmgr_commands]))
        else:
            execute_commands(sacctmgr_commands)

        if not opts.options.dry_run:
            (_, ldap_timestamp) = convert_timestamp(start_time)
            write_timestamp(SYNC_TIMESTAMP_FILENAME, ldap_timestamp)
            opts.epilogue("Accounts synced to slurm", stats)
        else:
            logging.info("Dry run done")

    except Exception as err:
        logging.exception("critical exception caught: %s", err)
        opts.critical("Script failed in a horrible way")
        sys.exit(NAGIOS_EXIT_CRITICAL)
def main():
    """
    Main script. The usual.
    """

    options = {
        "nagios-check-interval-threshold":
        NAGIOS_CHECK_INTERVAL_THRESHOLD,
        "access_token": ("OAuth2 token to access the account page REST API",
                         None, "store", None),
        "account_page_url": (
            "URL of the account page where we can find the REST API",
            str,
            "store",
            "https://apivsc.ugent.be/django",
        ),
        "clusters": (
            "Cluster(s) (comma-separated) to sync for. "
            "Overrides GENT_SLURM_COMPUTE_CLUSTERS that are in production.",
            str,
            "store",
            None,
        ),
    }

    opts = ExtendedSimpleOption(options)
    stats = {}

    try:
        client = AccountpageClient(token=opts.options.access_token,
                                   url=opts.options.account_page_url + "/api/")

        last_timestamp = "201804010000Z"  # the beginning of time

        logging.info("Last recorded timestamp was %s" % (last_timestamp))

        slurm_account_info = get_slurm_acct_info(SyncTypes.accounts)
        slurm_user_info = get_slurm_acct_info(SyncTypes.users)

        logging.debug("%d accounts found", len(slurm_account_info))
        logging.debug("%d users found", len(slurm_user_info))

        if opts.options.clusters is not None:
            clusters = opts.options.clusters.split(",")
        else:
            clusters = [
                c for c in GENT_SLURM_COMPUTE_CLUSTERS
                if c in GENT_PRODUCTION_COMPUTE_CLUSTERS
            ]

        sacctmgr_commands = []

        # make sure the institutes and the default accounts (VOs) are there for each cluster
        sacctmgr_commands += slurm_institute_accounts(slurm_account_info,
                                                      clusters)

        # All users belong to a VO, so fetching the VOs is necessary/
        account_page_vos = [mkVo(v) for v in client.vo.get()[1]]

        # The VOs do not track active state of users, so we need to fetch all accounts as well
        active_accounts = set(
            [a["vsc_id"] for a in client.account.get()[1] if a["isactive"]])

        # dictionary mapping the VO vsc_id on a tuple with the VO members and the VO itself
        account_page_members = dict([(vo.vsc_id, (set(vo.members), vo))
                                     for vo in account_page_vos])

        # process all regular VOs
        sacctmgr_commands += slurm_vo_accounts(account_page_vos,
                                               slurm_account_info, clusters)

        # process VO members
        sacctmgr_commands += slurm_user_accounts(account_page_members,
                                                 active_accounts,
                                                 slurm_user_info, clusters,
                                                 opts.options.dry_run)

        logging.info("Executing %d commands", len(sacctmgr_commands))

        if opts.options.dry_run:
            print("Commands to be executed:\n")
            print("\n".join([" ".join(c) for c in sacctmgr_commands]))
        else:
            execute_commands(sacctmgr_commands)

    except Exception as err:
        logger.exception("critical exception caught: %s" % (err))
        opts.critical("Script failed in a horrible way")
        sys.exit(NAGIOS_EXIT_CRITICAL)

    if not opts.options.dry_run:
        opts.epilogue("Accounts synced to slurm", stats)
    else:
        logger.info("Dry run done")