def update_vo_status(vo): """Make sure the rest of the subsystems know that the VO status has changed. Currently, this is tailored to our LDAP-based setup. - if the LDAP state is new: change the state to notify - if the LDAP state is modify: change the state to active - otherwise, the VO already was active in the past, and we simply have an idempotent script. """ if vo.dry_run: logging.info("VO %s has status %s. Dry-run so not changing anything", vo.vo_id, vo.vo.status) return if vo.vo.status not in (NEW, MODIFIED, MODIFY): logging.info("VO %s has status %s, not changing", vo.vo_id, vo.vo.status) return payload = {"status": ACTIVE} try: response = vo.rest_client.vo[vo.vo_id].patch(body=payload) except HTTPError as err: logging.error("VO %s status was not changed", vo.vo_id) raise VoStatusUpdateError("Vo %s status was not changed - received HTTP code %d" % err.code) else: virtual_organisation = mkVo(response) if virtual_organisation.status == ACTIVE: logging.info("VO %s status changed to %s", vo.vo_id, ACTIVE) else: logging.error("VO %s status was not changed", vo.vo_id) raise UserStatusUpdateError("VO %s status was not changed, still at %s" % (vo.vo_id, virtual_organisation.status))
def vo(self): if not self._vo_cache: self._vo_cache = mkVo( whenHTTPErrorRaise( self.rest_client.vo[self.vo_id].get, "Could not get VO from accountpage for VO %s" % self.vo_id)[1]) return self._vo_cache
def sync_altered_groups(self, last, dry_run=True): """ Synchronise altered groups back to LDAP. This also includes usergroups """ changed_groups = [ mkGroup(a) for a in self.client.allgroups.modified[last].get()[1] ] logging.info("Found %d modified groups in the range %s until %s" % (len(changed_groups), datetime.fromtimestamp(last).strftime("%Y%m%d%H%M%SZ"), self.now.strftime("%Y%m%d%H%M%SZ"))) logging.debug("Modified groups: %s", [g.vsc_id for g in changed_groups]) groups = { NEW: set(), UPDATED: set(), ERROR: set(), } for group in changed_groups: vo = False try: vo = mkVo(self.client.vo[group.vsc_id].get()[1]) except HTTPError as err: # if a 404 occured, the group is not an VO, so we skip this. Otherwise something else went wrong. if err.code != 404: raise ldap_attributes = { 'cn': str(group.vsc_id), 'institute': [str(group.institute['site'])], 'gidNumber': ["%d" % (group.vsc_id_number, )], 'moderator': [str(m) for m in group.moderators], 'memberUid': [str(a) for a in group.members], 'status': [str(group.status)], } if vo: ldap_attributes['fairshare'] = ["%d" % (vo.fairshare, )] ldap_attributes['description'] = [str(vo.description)] ldap_attributes['dataDirectory'] = [str(vo.data_path)] ldap_attributes['scratchDirectory'] = [str(vo.scratch_path)] # vsc40024 is moderator for all institute vo's if vo.vsc_id in INSTITUTE_VOS_GENT.values(): ldap_attributes['moderator'] = ['vsc40024'] logging.debug("Proposed changes for group %s: %s", group.vsc_id, ldap_attributes) result = self.add_or_update(VscLdapGroup, group.vsc_id, ldap_attributes, dry_run) groups[result].add(group.vsc_id) return groups
def main(): """ Main script. The usual. """ options = { "nagios-check-interval-threshold": NAGIOS_CHECK_INTERVAL_THRESHOLD, "access_token": ("OAuth2 token to access the account page REST API", None, "store", None), "account_page_url": ( "URL of the account page where we can find the REST API", str, "store", "https://apivsc.ugent.be/django", ), 'host_institute': ('Name of the institute where this script is being run', str, 'store', GENT), "clusters": ( "Cluster(s) (comma-separated) to sync for. " "Overrides <host_institute>_SLURM_COMPUTE_CLUSTERS that are in production.", "strlist", "store", [], ), 'start_timestamp': ('Timestamp to start the sync from', str, 'store', None), 'cluster_classes': ('Classes of clusters that should be synced, comma-separated', "strlist", 'store', [PRODUCTION, PILOT]) } opts = ExtendedSimpleOption(options) stats = {} (last_timestamp, start_time) = retrieve_timestamp_with_default( SYNC_TIMESTAMP_FILENAME, start_timestamp=opts.options.start_timestamp) logging.info("Using timestamp %s", last_timestamp) logging.info("Using startime %s", start_time) try: client = AccountpageClient(token=opts.options.access_token, url=opts.options.account_page_url + "/api/") host_institute = opts.options.host_institute slurm_account_info = get_slurm_acct_info(SyncTypes.accounts) slurm_user_info = get_slurm_acct_info(SyncTypes.users) logging.debug("%d accounts found", len(slurm_account_info)) logging.debug("%d users found", len(slurm_user_info)) if opts.options.clusters: clusters = opts.options.clusters else: clusters = [ cs for p in opts.options.cluster_classes for cs in VSC_SLURM_CLUSTERS[host_institute][p] ] sacctmgr_commands = [] # All users belong to a VO, so fetching the VOs is necessary/ account_page_vos = [ mkVo(v) for v in client.vo.institute[opts.options.host_institute].get()[1] ] # make sure the institutes and the default accounts (VOs) are there for each cluster institute_vos = dict([ (v.vsc_id, v) for v in account_page_vos if v.vsc_id in INSTITUTE_VOS_BY_INSTITUTE[host_institute].values() ]) sacctmgr_commands += slurm_institute_accounts(slurm_account_info, clusters, host_institute, institute_vos) # The VOs do not track active state of users, so we need to fetch all accounts as well active_accounts = set( [a["vsc_id"] for a in client.account.get()[1] if a["isactive"]]) # dictionary mapping the VO vsc_id on a tuple with the VO members and the VO itself account_page_members = dict([(vo.vsc_id, (set(vo.members), vo)) for vo in account_page_vos]) # process all regular VOs sacctmgr_commands += slurm_vo_accounts(account_page_vos, slurm_account_info, clusters, host_institute) # process VO members sacctmgr_commands += slurm_user_accounts(account_page_members, active_accounts, slurm_user_info, clusters, opts.options.dry_run) logging.info("Executing %d commands", len(sacctmgr_commands)) if opts.options.dry_run: print("Commands to be executed:\n") print("\n".join([" ".join(c) for c in sacctmgr_commands])) else: execute_commands(sacctmgr_commands) if not opts.options.dry_run: (_, ldap_timestamp) = convert_timestamp(start_time) write_timestamp(SYNC_TIMESTAMP_FILENAME, ldap_timestamp) opts.epilogue("Accounts synced to slurm", stats) else: logging.info("Dry run done") except Exception as err: logging.exception("critical exception caught: %s", err) opts.critical("Script failed in a horrible way") sys.exit(NAGIOS_EXIT_CRITICAL)
return if vo.vo.status not in (NEW, MODIFIED, MODIFY): logging.info("VO %s has status %s, not changing" % (vo.vo_id, vo.vo.status)) return payload = {"status": ACTIVE} try: response = client.vo[vo.vo_id].patch(body=payload) except HTTPError, err: logging.error("VO %s status was not changed", vo.vo_id) raise VoStatusUpdateError( "Vo %s status was not changed - received HTTP code %d" % err.code) else: virtual_organisation = mkVo(response) if virtual_organisation.status == ACTIVE: logging.info("VO %s status changed to %s" % (vo.vo_id, ACTIVE)) else: logging.error("VO %s status was not changed", vo.vo_id) raise UserStatusUpdateError( "VO %s status was not changed, still at %s" % (vo.vo_id, virtual_organisation.status)) def process_vos(options, vo_ids, storage_name, client, datestamp, host_institute=None):
def sync_altered_groups(self, last, dry_run=True): """ Synchronise altered groups back to LDAP. This also includes usergroups """ changed_groups = [ mkGroup(a) for a in self.client.allgroups.modified[last].get()[1] ] logging.info("Found %d modified groups in the range %s until %s" % (len(changed_groups), datetime.fromtimestamp(last).strftime("%Y%m%d%H%M%SZ"), self.now.strftime("%Y%m%d%H%M%SZ"))) logging.debug("Modified groups: %s", [g.vsc_id for g in changed_groups]) groups = { NEW: set(), UPDATED: set(), ERROR: set(), } for group in changed_groups: # General group attributes group_moderators = [str(m) for m in group.moderators] institute_name = str(group.institute['name']) ldap_attributes = { 'cn': str(group.vsc_id), 'institute': [institute_name], 'gidNumber': ["%d" % (group.vsc_id_number, )], 'moderator': group_moderators, 'status': [str(group.status)], } # Only set memberUid if there are actually any members in the group # Addition of new group records in LDAP will fail with empty memberUid # Existing LDAP records of groups that become empty will lose memberUid # ldap.modlist.modifyModlist (vsc-ldap) will delete any attributes that are missing in the new record if group.members: ldap_attributes['memberUid'] = [str(a) for a in group.members] # VO attributes try: vo = mkVo(self.client.vo[group.vsc_id].get()[1]) except HTTPError as err: # if a 404 occured, the group is not an VO, so we skip this. Otherwise something else went wrong. if err.code != 404: logging.raiseException( "Retrieval of group VO failed for unexpected reasons") else: # Group is a VO ldap_attributes['fairshare'] = ["%d" % (vo.fairshare, )] ldap_attributes['description'] = [str(vo.description)] ldap_attributes['dataDirectory'] = [str(vo.data_path)] ldap_attributes['scratchDirectory'] = [str(vo.scratch_path)] # Set institute moderator for main VOs if vo.vsc_id in DEFAULT_VOS_ALL: ldap_attributes['moderator'] = [ VSC_CONFIG.vo_group_mods[group.institute['name']] ] logging.info("Using VO moderator %s for VO %s", ldap_attributes['moderator'], group.vsc_id) if not ldap_attributes['moderator']: ldap_attributes['moderator'] = [ str(VSC_CONFIG.backup_group_mods[group.institute['name']]) ] logging.info("Using backup moderator %s for group %s", ldap_attributes['moderator'], group.vsc_id) logging.debug("Proposed changes for group %s: %s", group.vsc_id, ldap_attributes) result = self.add_or_update(VscLdapGroup, group.vsc_id, ldap_attributes, dry_run) groups[result].add(group.vsc_id) return groups
def main(): """ Main script. The usual. """ options = { "nagios-check-interval-threshold": NAGIOS_CHECK_INTERVAL_THRESHOLD, "access_token": ("OAuth2 token to access the account page REST API", None, "store", None), "account_page_url": ( "URL of the account page where we can find the REST API", str, "store", "https://apivsc.ugent.be/django", ), "clusters": ( "Cluster(s) (comma-separated) to sync for. " "Overrides GENT_SLURM_COMPUTE_CLUSTERS that are in production.", str, "store", None, ), } opts = ExtendedSimpleOption(options) stats = {} try: client = AccountpageClient(token=opts.options.access_token, url=opts.options.account_page_url + "/api/") last_timestamp = "201804010000Z" # the beginning of time logging.info("Last recorded timestamp was %s" % (last_timestamp)) slurm_account_info = get_slurm_acct_info(SyncTypes.accounts) slurm_user_info = get_slurm_acct_info(SyncTypes.users) logging.debug("%d accounts found", len(slurm_account_info)) logging.debug("%d users found", len(slurm_user_info)) if opts.options.clusters is not None: clusters = opts.options.clusters.split(",") else: clusters = [ c for c in GENT_SLURM_COMPUTE_CLUSTERS if c in GENT_PRODUCTION_COMPUTE_CLUSTERS ] sacctmgr_commands = [] # make sure the institutes and the default accounts (VOs) are there for each cluster sacctmgr_commands += slurm_institute_accounts(slurm_account_info, clusters) # All users belong to a VO, so fetching the VOs is necessary/ account_page_vos = [mkVo(v) for v in client.vo.get()[1]] # The VOs do not track active state of users, so we need to fetch all accounts as well active_accounts = set( [a["vsc_id"] for a in client.account.get()[1] if a["isactive"]]) # dictionary mapping the VO vsc_id on a tuple with the VO members and the VO itself account_page_members = dict([(vo.vsc_id, (set(vo.members), vo)) for vo in account_page_vos]) # process all regular VOs sacctmgr_commands += slurm_vo_accounts(account_page_vos, slurm_account_info, clusters) # process VO members sacctmgr_commands += slurm_user_accounts(account_page_members, active_accounts, slurm_user_info, clusters, opts.options.dry_run) logging.info("Executing %d commands", len(sacctmgr_commands)) if opts.options.dry_run: print("Commands to be executed:\n") print("\n".join([" ".join(c) for c in sacctmgr_commands])) else: execute_commands(sacctmgr_commands) except Exception as err: logger.exception("critical exception caught: %s" % (err)) opts.critical("Script failed in a horrible way") sys.exit(NAGIOS_EXIT_CRITICAL) if not opts.options.dry_run: opts.epilogue("Accounts synced to slurm", stats) else: logger.info("Dry run done")