コード例 #1
0
    def __init__(self,
                 vo_id,
                 storage=None,
                 rest_client=None,
                 host_institute=GENT):
        """Initialise"""
        super(VscTier2AccountpageVo, self).__init__(vo_id, rest_client)

        self.vo_id = vo_id
        self.vsc = VSC()
        self.host_institute = host_institute

        if not storage:
            self.storage = VscStorage()
        else:
            self.storage = storage

        self.gpfs = GpfsOperations()
        self.posix = PosixOperations()

        self.dry_run = False

        self._vo_data_quota_cache = None
        self._vo_data_shared_quota_cache = None
        self._vo_scratch_quota_cache = None
        self._institute_quota_cache = None

        self._sharing_group_cache = None
コード例 #2
0
ファイル: inode_log.py プロジェクト: boegel/vsc-filesystems
def main():
    """The main."""

    # Note: debug option is provided by generaloption
    # Note: other settings, e.g., ofr each cluster will be obtained from the configuration file
    options = {
        'nagios-check-interval-threshold': NAGIOS_CHECK_INTERVAL_THRESHOLD,
        'location': ('path to store the gzipped files', None, 'store', INODE_LOG_ZIP_PATH),
    }

    opts = ExtendedSimpleOption(options)

    stats = {}

    try:
        gpfs = GpfsOperations()
        filesets = gpfs.list_filesets()

        if not os.path.exists(opts.options.location):
            os.makedirs(opts.options.location, 0755)

        critical_filesets = dict()

        for filesystem in filesets:
            stats["%s_inodes_log_critical" % (filesystem,)] = INODE_STORE_LOG_CRITICAL
            try:
                filename = "gpfs_inodes_%s_%s.gz" % (time.strftime("%Y%m%d-%H:%M"), filesystem)
                path = os.path.join(opts.options.location, filename)
                zipfile = gzip.open(path, 'wb', 9)  # Compress to the max
                zipfile.write(json.dumps(filesets[filesystem]))
                zipfile.close()
                stats["%s_inodes_log" % (filesystem,)] = 0
                logger.info("Stored inodes information for FS %s" % (filesystem))

                cfs = process_inodes_information(filesets[filesystem])
                logger.info("Processed inodes information for filesystem %s" % (filesystem,))
                if cfs:
                    critical_filesets[filesystem] = cfs
                    logger.info("Filesystem %s has at least %d filesets reaching the limit" % (filesystem, len(cfs)))

            except Exception:
                stats["%s_inodes_log" % (filesystem,)] = 1
                logger.exception("Failed storing inodes information for FS %s" % (filesystem))

        logger.info("Critical filesets: %s" % (critical_filesets,))

        if critical_filesets:
            mail_admins(critical_filesets, opts.options.dry_run)

    except Exception:
        logger.exception("Failure obtaining GPFS inodes")
        opts.critical("Failure to obtain GPFS inodes information")
        sys.exit(NAGIOS_EXIT_CRITICAL)

    opts.epilogue("Logged GPFS inodes", stats)
コード例 #3
0
def main():
    """The main."""

    # Note: debug option is provided by generaloption
    # Note: other settings, e.g., ofr each cluster will be obtained from the configuration file
    options = {
        'nagios-check-interval-threshold':
        NAGIOS_CHECK_INTERVAL_THRESHOLD,
        'location':
        ('path to store the gzipped files', None, 'store', QUOTA_LOG_ZIP_PATH),
        'backend': ('Storage backend', None, 'store', 'gpfs'),
    }

    opts = ExtendedSimpleOption(options)

    stats = {}

    backend = opts.options.backend
    try:
        if backend == 'gpfs':
            storage_backend = GpfsOperations()
        elif backend == 'lustre':
            storage_backend = LustreOperations()
        else:
            logger.exception("Backend %s not supported", backend)

        quota = storage_backend.list_quota()

        if not os.path.exists(opts.options.location):
            os.makedirs(opts.options.location, 0o755)

        for key in quota:
            stats["%s_quota_log_critical" % (key, )] = QUOTA_STORE_LOG_CRITICAL
            try:
                filename = "%s_quota_%s_%s.gz" % (
                    backend, time.strftime("%Y%m%d-%H:%M"), key)
                path = os.path.join(opts.options.location, filename)
                zipfile = gzip.open(path, 'wb', 9)  # Compress to the max
                zipfile.write(json.dumps(quota[key]).encode())
                zipfile.close()
                stats["%s_quota_log" % (key, )] = 0
                logger.info("Stored quota information for FS %s", key)
            except Exception:
                stats["%s_quota_log" % (key, )] = 1
                logger.exception("Failed storing quota information for FS %s",
                                 key)
    except Exception:
        logger.exception("Failure obtaining %s quota", backend)
        opts.critical("Failure to obtain %s quota information" % backend)

    opts.epilogue("Logged %s quota" % backend, stats)
def main():

    storage_settings = VscStorage()

    local_storage_conf = configparser.SafeConfigParser()
    local_storage_conf.read(QUOTA_CONF_FILE)

    gpfs = GpfsOperations()
    gpfs.list_filesystems()
    gpfs.list_filesets()

    for storage_name in local_storage_conf.get('MAIN', 'storage').split(','):

        filesystem_name = storage_settings[storage_name].filesystem
        filesystem_info = gpfs.get_filesystem_info(filesystem_name)

        if storage_name in ('VSC_HOME'):
            set_up_filesystem(gpfs, storage_settings, storage_name,
                              filesystem_info, filesystem_name)
            set_up_apps(gpfs, storage_settings, storage_name, filesystem_info,
                        filesystem_name)
        else:
            set_up_filesystem(gpfs,
                              storage_settings,
                              storage_name,
                              filesystem_info,
                              filesystem_name,
                              vo_support=True)
コード例 #5
0
def main():
    """The main."""

    # Note: debug option is provided by generaloption
    # Note: other settings, e.g., ofr each cluster will be obtained from the configuration file
    options = {
        'nagios-check-interval-threshold':
        NAGIOS_CHECK_INTERVAL_THRESHOLD,
        'location':
        ('path to store the gzipped files', None, 'store', QUOTA_LOG_ZIP_PATH),
    }

    opts = ExtendedSimpleOption(options)

    stats = {}

    try:
        gpfs = GpfsOperations()
        quota = gpfs.list_quota()

        if not os.path.exists(opts.options.location):
            os.makedirs(opts.options.location, 0755)

        for key in quota:
            stats["%s_quota_log_critical" % (key, )] = QUOTA_STORE_LOG_CRITICAL
            try:
                filename = "gpfs_quota_%s_%s.gz" % (
                    time.strftime("%Y%m%d-%H:%M"), key)
                path = os.path.join(opts.options.location, filename)
                zipfile = gzip.open(path, 'wb', 9)  # Compress to the max
                zipfile.write(json.dumps(quota[key]))
                zipfile.close()
                stats["%s_quota_log" % (key, )] = 0
                logger.info("Stored quota information for FS %s" % (key))
            except Exception:
                stats["%s_quota_log" % (key, )] = 1
                logger.exception("Failed storing quota information for FS %s" %
                                 (key))
    except Exception:
        logger.exception("Failure obtaining GPFS quota")
        opts.critical("Failure to obtain GPFS quota information")
        sys.exit(NAGIOS_EXIT_CRITICAL)

    opts.epilogue("Logged GPFS quota", stats)
コード例 #6
0
ファイル: user.py プロジェクト: stdweird/vsc-administration-1
    def __init__(self,
                 user_id,
                 storage=None,
                 pickle_storage=None,
                 rest_client=None,
                 account=None,
                 pubkeys=None,
                 host_institute=None,
                 use_user_cache=False):
        """
        Initialisation.
        @type vsc_user_id: string representing the user's VSC ID (vsc[0-9]{5})
        """
        super(VscTier2AccountpageUser,
              self).__init__(user_id,
                             rest_client,
                             account=account,
                             pubkeys=pubkeys,
                             use_user_cache=use_user_cache)

        # Move to vsc-config?
        default_pickle_storage = {
            GENT: VSC_SCRATCH_KYUKON,
            BRUSSEL: VSC_SCRATCH_THEIA,
        }

        if host_institute is None:
            host_institute = GENT
        self.host_institute = host_institute

        if pickle_storage is None:
            pickle_storage = default_pickle_storage[host_institute]

        self.pickle_storage = pickle_storage
        if storage is None:
            storage = VscStorage()

        self.institute_path_templates = storage.path_templates[
            self.host_institute]
        self.institute_storage = storage[self.host_institute]

        self.vsc = VSC()
        self.gpfs = GpfsOperations()  # Only used when needed
        self.posix = PosixOperations()
コード例 #7
0
ファイル: user.py プロジェクト: wpoely86/vsc-administration
    def __init__(self, user_id, storage=None, pickle_storage='VSC_SCRATCH_KYUKON', rest_client=None,
                 account=None, pubkeys=None, host_institute=None, use_user_cache=False):
        """
        Initialisation.
        @type vsc_user_id: string representing the user's VSC ID (vsc[0-9]{5})
        """
        super(VscTier2AccountpageUser, self).__init__(user_id, rest_client, account=account,
                                                      pubkeys=pubkeys, use_user_cache=use_user_cache)

        self.pickle_storage = pickle_storage
        if not storage:
            self.storage = VscStorage()
        else:
            self.storage = storage

        self.vsc = VSC()
        self.gpfs = GpfsOperations()  # Only used when needed
        self.posix = PosixOperations()
        self.host_institute = host_institute
コード例 #8
0
ファイル: quota_log.py プロジェクト: boegel/vsc-filesystems
def main():
    """The main."""

    # Note: debug option is provided by generaloption
    # Note: other settings, e.g., ofr each cluster will be obtained from the configuration file
    options = {
        'nagios-check-interval-threshold': NAGIOS_CHECK_INTERVAL_THRESHOLD,
        'location': ('path to store the gzipped files', None, 'store', QUOTA_LOG_ZIP_PATH),
    }

    opts = ExtendedSimpleOption(options)

    filesystem_error = 0
    filesystem_ok = 0
    error = False

    stats = {}

    try:
        gpfs = GpfsOperations()
        quota = gpfs.list_quota()

        if not os.path.exists(opts.options.location):
            os.makedirs(opts.options.location, 0755)

        for key in quota:
            stats["%s_quota_log_critical" % (key,)] = QUOTA_STORE_LOG_CRITICAL
            try:
                filename = "gpfs_quota_%s_%s.gz" % (time.strftime("%Y%m%d-%H:%M"), key)
                path = os.path.join(opts.options.location, filename)
                zipfile = gzip.open(path, 'wb', 9)  # Compress to the max
                zipfile.write(json.dumps(quota[key]))
                zipfile.close()
                stats["%s_quota_log" % (key,)] = 0
                logger.info("Stored quota information for FS %s" % (key))
            except Exception, err:
                stats["%s_quota_log" % (key,)] = 1
                logger.exception("Failed storing quota information for FS %s" % (key))
    except Exception, err:
        logger.exception("Failure obtaining GPFS quota")
        opts.critical("Failure to obtain GPFS quota information")
        sys.exit(NAGIOS_EXIT_CRITICAL)
コード例 #9
0
def main():
    """
    Main script.
    - process the users and VOs
    - write the new timestamp if everything went OK
    - write the nagios check file
    """

    options = {
        'nagios-check-interval-threshold':
        NAGIOS_CHECK_INTERVAL_THRESHOLD,
        'storage': ('storage systems on which to deploy users and vos', None,
                    'extend', []),
    }

    opts = ExtendedSimpleOption(options)
    stats = {}

    try:
        storage_settings = VscStorage()
        gpfs = GpfsOperations()
        gpfs.list_filesystems()
        gpfs.list_filesets()

        for storage_name in opts.options.storage:

            filesystem_name = storage_settings[storage_name].filesystem
            filesystem_info = gpfs.get_filesystem_info(filesystem_name)

            set_up_filesystem(gpfs,
                              storage_settings,
                              storage_name,
                              filesystem_info,
                              filesystem_name,
                              vo_support=True,
                              dry_run=opts.options.dry_run)

    except Exception as err:
        logging.exception("critical exception caught: %s", err)
        opts.critical("Script failed in a horrible way")
        sys.exit(NAGIOS_EXIT_CRITICAL)

    opts.epilogue("UGent users and VOs synchronised", stats)
コード例 #10
0
ファイル: quota_log.py プロジェクト: piojo/vsc-filesystems
def main():
    """The main."""

    # Note: debug option is provided by generaloption
    # Note: other settings, e.g., ofr each cluster will be obtained from the configuration file
    options = {
        'nagios': ('print out nagios information', None, 'store_true', False, 'n'),
        'nagios-check-filename': ('filename of where the nagios check data is stored',
                                  str,
                                  'store',
                                  NAGIOS_CHECK_FILENAME),
        'nagios-check-interval-threshold': ('threshold of nagios checks timing out',
                                            None,
                                            'store',
                                            NAGIOS_CHECK_INTERVAL_THRESHOLD),
        'location': ('path to store the gzipped files', None, 'store', QUOTA_LOG_ZIP_PATH),
        'ha': ('high-availability master IP address', None, 'store', None),
        'dry-run': ('do not make any updates whatsoever', None, 'store_true', False),
    }

    opts = simple_option(options)

    nagios_reporter = NagiosReporter(NAGIOS_HEADER,
                                     opts.options.nagios_check_filename,
                                     opts.options.nagios_check_interval_threshold)
    if opts.options.nagios:
        logger.debug("Producing Nagios report and exiting.")
        nagios_reporter.report_and_exit()
        sys.exit(0)  # not reached

    if not proceed_on_ha_service(opts.options.ha):
        logger.warning("Not running on the target host in the HA setup. Stopping.")
        nagios_reporter.cache(NAGIOS_EXIT_WARNING,
                              NagiosResult("Not running on the HA master."))
        sys.exit(NAGIOS_EXIT_WARNING)

    lockfile = TimestampedPidLockfile(QUOTA_LOG_LOCK_FILE)
    lock_or_bork(lockfile, nagios_reporter)

    logger.info("starting quota_log run")

    filesystem_error = 0
    filesystem_ok = 0
    error = False

    try:
        gpfs = GpfsOperations()
        quota = gpfs.list_quota()

        for key in quota:
            try:
                filename = "gpfs_quota_%s_%s.gz" % (time.strftime("%Y%m%d-%H:%M"), key)
                path = os.path.join(opts.options.location, filename)
                zipfile = gzip.open(path, 'wb', 9)  # Compress to the max
                zipfile.write(json.dumps(quota[key]))
                zipfile.close()
                filesystem_ok += 1
                logger.info("Stored quota information for FS %s" % (key))
            except Exception, err:
                logger.exception("Failed storing quota information for FS %s" % (key))
                filesystem_error += 1
    except Exception, err:
        logger.exception("Failure obtaining GPFS quota")
        error = True
コード例 #11
0
ファイル: user.py プロジェクト: wpoely86/vsc-administration
class VscTier2AccountpageUser(VscAccountPageUser):
    """
    A user on each of our Tier-2 system using the account page REST API
    to retrieve its information.
    """
    def __init__(self, user_id, storage=None, pickle_storage='VSC_SCRATCH_KYUKON', rest_client=None,
                 account=None, pubkeys=None, host_institute=None, use_user_cache=False):
        """
        Initialisation.
        @type vsc_user_id: string representing the user's VSC ID (vsc[0-9]{5})
        """
        super(VscTier2AccountpageUser, self).__init__(user_id, rest_client, account=account,
                                                      pubkeys=pubkeys, use_user_cache=use_user_cache)

        self.pickle_storage = pickle_storage
        if not storage:
            self.storage = VscStorage()
        else:
            self.storage = storage

        self.vsc = VSC()
        self.gpfs = GpfsOperations()  # Only used when needed
        self.posix = PosixOperations()
        self.host_institute = host_institute

    def _init_cache(self, **kwargs):
        super(VscTier2AccountpageUser, self)._init_cache(**kwargs)
        self._cache['quota'] = {}

    @property
    def user_home_quota(self):
        if not self._cache['quota']:
            self._init_quota_cache()
        return self._cache['quota']['home']

    @property
    def user_data_quota(self):
        if not self._cache['quota']:
            self._init_quota_cache()
        return self._cache['quota']['data']

    @property
    def user_scratch_quota(self):
        if not self._cache['quota']:
            self._init_quota_cache()
        return self._cache['quota']['scratch']

    @property
    def vo_data_quota(self):
        if not self._cache['quota']:
            self._init_quota_cache()
        return self._cache['quota']['vo']['data']

    @property
    def vo_scratch_quota(self):
        if not self._cache['quota']:
            self._init_quota_cache()
        return self._cache['quota']['vo']['scratch']

    def _init_quota_cache(self):
        if self.host_institute is None:
            logging.warn("_init_quota_cache with host_institute None")
        all_quota = [mkVscUserSizeQuota(q) for q in self.rest_client.account[self.user_id].quota.get()[1]]
        # we no longer set defaults, since we do not want to accidentally revert people to some default
        # that is lower than their actual quota if the accountpage goes down in between retrieving the users
        # and fetching the quota
        institute_quota = [q for q in all_quota if q.storage['institute'] == self.host_institute]
        fileset_name = self.vsc.user_grouping_fileset(self.account.vsc_id)

        def user_proposition(quota, storage_type):
            return quota.fileset == fileset_name and quota.storage['storage_type'] == storage_type

        # Non-UGent users who have quota in Gent, e.g., in a VO, should not have these set
        if self.person.institute['site'] == self.host_institute:
            self._cache['quota']['home'] = [q.hard for q in institute_quota if user_proposition(q, 'home')][0]
            self._cache['quota']['data'] = [q.hard for q in institute_quota
                                            if user_proposition(q, 'data')
                                            and not q.storage['name'].endswith('SHARED')][0]
            self._cache['quota']['scratch'] = filter(lambda q: user_proposition(q, 'scratch'), institute_quota)
        else:
            self._cache['quota']['home'] = None
            self._cache['quota']['data'] = None
            self._cache['quota']['scratch'] = None

        fileset_name = 'gvo'

        def user_vo_proposition(quota, storage_type):
            return quota.fileset.startswith(fileset_name) and quota.storage['storage_type'] == storage_type

        self._cache['quota']['vo'] = {}
        self._cache['quota']['vo']['data'] = [q for q in institute_quota if user_vo_proposition(q, 'data')]
        self._cache['quota']['vo']['scratch'] = [q for q in institute_quota if user_vo_proposition(q, 'scratch')]

    def pickle_path(self):
        """Provide the location where to store pickle files for this user.

        This location is the user'path on the pickle_storage specified when creating
        a VscTier2AccountpageUser instance.
        """
        (path, _) = self.storage.path_templates[GENT][self.pickle_storage]['user'](self.account.vsc_id)
        return os.path.join(self.storage[self.pickle_storage].gpfs_mount_point, path)

    def _create_grouping_fileset(self, filesystem_name, path, fileset_name):
        """Create a fileset for a group of 100 user accounts

        - creates the fileset if it does not already exist
        """
        self.gpfs.list_filesets()
        logging.info("Trying to create the grouping fileset %s with link path %s", fileset_name, path)

        if not self.gpfs.get_fileset_info(filesystem_name, fileset_name):
            logging.info("Creating new fileset on %s with name %s and path %s", filesystem_name, fileset_name, path)
            base_dir_hierarchy = os.path.dirname(path)
            self.gpfs.make_dir(base_dir_hierarchy)
            self.gpfs.make_fileset(path, fileset_name)
        else:
            logging.info("Fileset %s already exists for user group of %s ... not creating again.",
                         fileset_name, self.account.vsc_id)

        self.gpfs.chmod(0o755, path)

    def _get_mount_path(self, storage_name, mount_point):
        """Get the mount point for the location we're running"""
        if mount_point == "login":
            mount_path = self.storage[storage_name].login_mount_point
        elif mount_point == "gpfs":
            mount_path = self.storage[storage_name].gpfs_mount_point
        else:
            logging.error("mount_point (%s) is not login or gpfs", mount_point)
            raise Exception("mount_point (%s) is not designated as gpfs or login" % (mount_point,))

        return mount_path

    def _get_path(self, storage_name, mount_point="gpfs"):
        """Get the path for the (if any) user directory on the given storage_name."""
        (path, _) = self.storage.path_templates[GENT][storage_name]['user'](self.account.vsc_id)
        return os.path.join(self._get_mount_path(storage_name, mount_point), path)

    def _get_grouping_path(self, storage_name, mount_point="gpfs"):
        """Get the path and the fileset for the user group directory (and associated fileset)."""
        (path, fileset) = self.storage.path_templates[GENT][storage_name]['user'](self.account.vsc_id)
        return (os.path.join(self._get_mount_path(storage_name, mount_point), os.path.dirname(path)), fileset)

    def _home_path(self, mount_point="gpfs"):
        """Return the path to the home dir."""
        return self._get_path(VSC_HOME, mount_point)

    def _data_path(self, mount_point="gpfs"):
        """Return the path to the data dir."""
        return self._get_path(VSC_DATA, mount_point)

    def _scratch_path(self, storage_name, mount_point="gpfs"):
        """Return the path to the scratch dir"""
        return self._get_path(storage_name, mount_point)

    def _grouping_home_path(self, mount_point="gpfs"):
        """Return the path to the grouping fileset for the users on data."""
        return self._get_grouping_path(VSC_HOME, mount_point)

    def _grouping_data_path(self, mount_point="gpfs"):
        """Return the path to the grouping fileset for the users on data."""
        return self._get_grouping_path(VSC_DATA, mount_point)

    def _grouping_scratch_path(self, storage_name, mount_point="gpfs"):
        """Return the path to the grouping fileset for the users on the given scratch filesystem."""
        return self._get_grouping_path(storage_name, mount_point)

    def _create_user_dir(self, grouping_f, path_f, storage_name):
        """Create the directories and files for some user location.
        
        @type grouping: function that yields the grouping path for the location.
        @type path: function that yields the actual path for the location.
        """
        try:
            (grouping_path, fileset) = grouping_f()
            self._create_grouping_fileset(self.storage[storage_name].filesystem, grouping_path, fileset)

            path = path_f()
            if self.gpfs.is_symlink(path):
                logging.warning("Trying to make a user dir, but a symlink already exists at %s", path)
                return

            create_stat_directory(
                path,
                0o700,
                int(self.account.vsc_id_number),
                int(self.usergroup.vsc_id_number),
                self.gpfs
            )
        except Exception:
            logging.exception("Could not create dir %s for user %s", path, self.account.vsc_id)
            raise

    def create_home_dir(self):
        """Create all required files in the (future) user's home directory."""
        self._create_user_dir(self._grouping_home_path, self._home_path, VSC_HOME)

    def create_data_dir(self):
        """Create the user's directory on the HPC data filesystem."""
        self._create_user_dir(self._grouping_data_path, self._data_path, VSC_DATA)

    def create_scratch_dir(self, storage_name):
        """Create the user's directory on the given scratch filesystem."""
        self._create_user_dir(
            lambda: self._grouping_scratch_path(storage_name),
            lambda: self._scratch_path(storage_name),
            storage_name)

    def _set_quota(self, storage_name, path, hard):
        """Set the given quota on the target path.

        @type path: path into a GPFS mount
        @type hard: hard limit
        """
        if not hard:
            logging.error("No user quota set for %s", storage_name)
            return

        quota = hard * 1024 * self.storage[storage_name].data_replication_factor
        soft = int(self.vsc.quota_soft_fraction * quota)

        logging.info("Setting quota for %s on %s to %d", storage_name, path, quota)

        # LDAP information is expressed in KiB, GPFS wants bytes.
        self.gpfs.set_user_quota(soft, int(self.account.vsc_id_number), path, quota)
        self.gpfs.set_user_grace(path, self.vsc.user_storage_grace_time)  # 7 days

    def set_home_quota(self):
        """Set USR quota on the home FS in the user fileset."""
        path = self._home_path()
        hard = self.user_home_quota
        self._set_quota(VSC_HOME, path, hard)

    def set_data_quota(self):
        """Set USR quota on the data FS in the user fileset."""
        (path, _) = self._grouping_data_path()
        hard = self.user_data_quota
        self._set_quota(VSC_DATA, path, hard)

    def set_scratch_quota(self, storage_name):
        """Set USR quota on the scratch FS in the user fileset."""
        quota = filter(lambda q: q.storage['name'] in (storage_name,), self.user_scratch_quota)
        if not quota:
            logging.error("No scratch quota information available for %s", storage_name)
            return

        if self.storage[storage_name].user_grouping_fileset:
            (path, _) = self._grouping_scratch_path(storage_name)
        else:
            # Hack; this should actually become the link path of the fileset
            # that contains the path (the file, not the followed symlink)
            path = os.path.normpath(os.path.join(self._scratch_path(storage_name), '..'))

        self._set_quota(storage_name, path, quota[0].hard)

    def populate_home_dir(self):
        """Store the required files in the user's home directory.

        Does not overwrite files that may contain user defined content.
        """
        path = self._home_path()
        self.gpfs.populate_home_dir(int(self.account.vsc_id_number),
                                    int(self.usergroup.vsc_id_number),
                                    path,
                                    [p.pubkey for p in self.pubkeys])

    def __setattr__(self, name, value):
        """Override the setting of an attribute:

        - dry_run: set this here and in the gpfs and posix instance fields.
        - otherwise, call super's __setattr__()
        """

        if name == 'dry_run':
            self.gpfs.dry_run = value
            self.posix.dry_run = value

        super(VscTier2AccountpageUser, self).__setattr__(name, value)
コード例 #12
0
ファイル: dquota.py プロジェクト: boegel/vsc-filesystems
def main():
    """Main script"""

    options = {
        'nagios-check-interval-threshold': NAGIOS_CHECK_INTERVAL_THRESHOLD,
        'storage': ('the VSC filesystems that are checked by this script', None, 'extend', []),
        'account_page_url': ('Base URL of the account page', None, 'store', 'https://account.vscentrum.be/django'),
        'access_token': ('OAuth2 token to access the account page REST API', None, 'store', None),
    }
    opts = ExtendedSimpleOption(options)

    try:
        opener = urllib2.build_opener(urllib2.HTTPHandler)
        access_token = opts.options.access_token

        user_id_map = map_uids_to_names()  # is this really necessary?
        LdapQuery(VscConfiguration())
        gpfs = GpfsOperations()
        storage = VscStorage()

        target_filesystems = [storage[s].filesystem for s in opts.options.storage]

        filesystems = gpfs.list_filesystems(target_filesystems).keys()
        logger.debug("Found the following GPFS filesystems: %s" % (filesystems))

        filesets = gpfs.list_filesets()
        logger.debug("Found the following GPFS filesets: %s" % (filesets))

        quota = gpfs.list_quota()
        exceeding_filesets = {}
        exceeding_users = {}
        stats = {}

        for storage_name in opts.options.storage:

            logger.info("Processing quota for storage_name %s" % (storage_name))
            filesystem = storage[storage_name].filesystem

            if filesystem not in filesystems:
                logger.error("Non-existant filesystem %s" % (filesystem))
                continue

            if filesystem not in quota.keys():
                logger.error("No quota defined for storage_name %s [%s]" % (storage_name, filesystem))
                continue

            quota_storage_map = get_mmrepquota_maps(quota[filesystem], storage_name, filesystem, filesets)

            exceeding_filesets[storage_name] = process_fileset_quota(storage,
                                                                     gpfs,
                                                                     storage_name,
                                                                     filesystem,
                                                                     quota_storage_map['FILESET'],
                                                                     opener,
                                                                     opts.options.account_page_url,
                                                                     access_token,
                                                                     opts.options.dry_run)
            exceeding_users[storage_name] = process_user_quota(storage,
                                                               gpfs,
                                                               storage_name,
                                                               filesystem,
                                                               quota_storage_map['USR'],
                                                               user_id_map,
                                                               opener,
                                                               opts.options.account_page_url,
                                                               access_token,
                                                               opts.options.dry_run)

            stats["%s_fileset_critical" % (storage_name,)] = QUOTA_FILESETS_CRITICAL
            if exceeding_filesets[storage_name]:
                stats["%s_fileset" % (storage_name,)] = 1
                logger.warning("storage_name %s found %d filesets that are exceeding their quota" % (storage_name,
                                                                                                len(exceeding_filesets)))
                for (e_fileset, e_quota) in exceeding_filesets[storage_name]:
                    logger.warning("%s has quota %s" % (e_fileset, str(e_quota)))
            else:
                stats["%s_fileset" % (storage_name,)] = 0
                logger.debug("storage_name %s found no filesets that are exceeding their quota" % storage_name)

            notify_exceeding_filesets(gpfs=gpfs,
                                      storage=storage_name,
                                      filesystem=filesystem,
                                      exceeding_items=exceeding_filesets[storage_name],
                                      dry_run=opts.options.dry_run)

            stats["%s_users_warning" % (storage_name,)] = QUOTA_USERS_WARNING
            stats["%s_users_critical" % (storage_name,)] = QUOTA_USERS_CRITICAL
            if exceeding_users[storage_name]:
                stats["%s_users" % (storage_name,)] = len(exceeding_users[storage_name])
                logger.warning("storage_name %s found %d users who are exceeding their quota" %
                               (storage_name, len(exceeding_users[storage_name])))
                for (e_user_id, e_quota) in exceeding_users[storage_name]:
                    logger.warning("%s has quota %s" % (e_user_id, str(e_quota)))
            else:
                stats["%s_users" % (storage_name,)] = 0
                logger.debug("storage_name %s found no users who are exceeding their quota" % storage_name)

            notify_exceeding_users(gpfs=gpfs,
                                   storage=storage_name,
                                   filesystem=filesystem,
                                   exceeding_items=exceeding_users[storage_name],
                                   dry_run=opts.options.dry_run)
    except Exception, err:
        logger.exception("critical exception caught: %s" % (err))
        opts.critical("Script failed in a horrible way")
        sys.exit(NAGIOS_EXIT_CRITICAL)
コード例 #13
0
ファイル: dshowq.py プロジェクト: smoors/vsc-jobs
def main():
    # Collect all info

    # Note: debug option is provided by generaloption
    # Note: other settings, e.g., ofr each cluster will be obtained from the configuration file
    options = {
        'nagios-check-interval-threshold':
        NAGIOS_CHECK_INTERVAL_THRESHOLD,
        'hosts':
        ('the hosts/clusters that should be contacted for job information',
         None, 'extend', []),
        'information': ('the sort of information to store: user, vo, project',
                        None, 'store', 'user'),
        'location': ('the location for storing the pickle file: delcatty, muk',
                     str, 'store', 'delcatty'),
        'account_page_url':
        ('the URL at which the account page resides', None, 'store', None),
        'access_token':
        ('the token that will allow authentication against the account page',
         None, 'store', None),
        'target_master':
        ('the master used to execute showq commands', None, 'store', None),
        'target_user': ('the user for ssh to the target master', None, 'store',
                        None),
    }

    opts = ExtendedSimpleOption(options)

    try:
        rest_client = AccountpageClient(token=opts.options.access_token)

        gpfs = GpfsOperations()
        storage = VscStorage()
        storage_name = cluster_user_pickle_store_map[opts.options.location]
        login_mount_point = storage[storage_name].login_mount_point
        gpfs_mount_point = storage[storage_name].gpfs_mount_point

        clusters = {}
        for host in opts.options.hosts:
            master = opts.configfile_parser.get(host, "master")
            showq_path = opts.configfile_parser.get(host, "showq_path")
            clusters[host] = {'master': master, 'path': showq_path}

        logger.debug("clusters = %s" % (clusters, ))
        showq = SshShowq(opts.options.target_master,
                         opts.options.target_user,
                         clusters,
                         cache_pickle=True,
                         dry_run=opts.options.dry_run)

        logger.debug("Getting showq information ...")

        (queue_information, _, _) = showq.get_moab_command_information()
        timeinfo = time.time()

        active_users = queue_information.keys()

        logger.debug("Active users: %s" % (active_users))
        logger.debug("Queue information: %s" % (queue_information))

        # We need to determine which users should get an updated pickle. This depends on
        # - the active user set
        # - the information we want to provide on the cluster(set) where this script runs
        # At the same time, we need to determine the job information each user gets to see
        tup = (opts.options.information, active_users, queue_information,
               rest_client)
        (target_users, target_queue_information,
         user_map) = determine_target_information(*tup)

        nagios_user_count = 0
        nagios_no_store = 0

        stats = {}

        for user in target_users:
            try:
                path = get_pickle_path(opts.options.location, user,
                                       rest_client)
                user_queue_information = target_queue_information[user]
                user_queue_information['timeinfo'] = timeinfo
                store_on_gpfs(user, path, "showq",
                              (user_queue_information, user_map[user]), gpfs,
                              login_mount_point, gpfs_mount_point,
                              ".showq.json.gz", opts.options.dry_run)
                nagios_user_count += 1
            except Exception:
                logger.error("Could not store pickle file for user %s" %
                             (user))
                nagios_no_store += 1

        stats["store_users"] = nagios_user_count
        stats["store_fail"] = nagios_no_store
        stats["store_fail_critical"] = STORE_LIMIT_CRITICAL
    except Exception, err:
        logger.exception("critical exception caught: %s" % (err))
        opts.critical("Script failed in a horrible way")
        sys.exit(NAGIOS_EXIT_CRITICAL)
コード例 #14
0
def main():
    """The main."""

    # Note: debug option is provided by generaloption
    # Note: other settings, e.g., ofr each cluster will be obtained from the configuration file
    options = {
        'nagios-check-interval-threshold':
        NAGIOS_CHECK_INTERVAL_THRESHOLD,
        'location':
        ('path to store the gzipped files', None, 'store', INODE_LOG_ZIP_PATH),
        'backend': ('Storage backend', None, 'store', 'gpfs'),
        'host_institute':
        ('Name of the institute where this script is being run', str, 'store',
         GENT),
    }

    opts = ExtendedSimpleOption(options)
    logger = opts.log

    stats = {}

    backend = opts.options.backend
    try:
        if backend == 'gpfs':
            storage_backend = GpfsOperations()
        elif backend == 'lustre':
            storage_backend = LustreOperations()
        else:
            logger.exception("Backend %s not supported" % backend)

        filesets = storage_backend.list_filesets()
        quota = storage_backend.list_quota()

        if not os.path.exists(opts.options.location):
            os.makedirs(opts.options.location, 0o755)

        critical_filesets = dict()

        for filesystem in filesets:
            stats["%s_inodes_log_critical" %
                  (filesystem, )] = INODE_STORE_LOG_CRITICAL
            try:
                filename = "%s_inodes_%s_%s.gz" % (
                    backend, time.strftime("%Y%m%d-%H:%M"), filesystem)
                path = os.path.join(opts.options.location, filename)
                zipfile = gzip.open(path, 'wb', 9)  # Compress to the max
                zipfile.write(json.dumps(filesets[filesystem]))
                zipfile.close()
                stats["%s_inodes_log" % (filesystem, )] = 0
                logger.info("Stored inodes information for FS %s" %
                            (filesystem))

                cfs = process_inodes_information(filesets[filesystem],
                                                 quota[filesystem]['FILESET'],
                                                 threshold=0.9,
                                                 storage=backend)
                logger.info("Processed inodes information for filesystem %s" %
                            (filesystem, ))
                if cfs:
                    critical_filesets[filesystem] = cfs
                    logger.info(
                        "Filesystem %s has at least %d filesets reaching the limit"
                        % (filesystem, len(cfs)))

            except Exception:
                stats["%s_inodes_log" % (filesystem, )] = 1
                logger.exception(
                    "Failed storing inodes information for FS %s" %
                    (filesystem))

        logger.info("Critical filesets: %s" % (critical_filesets, ))

        if critical_filesets:
            mail_admins(critical_filesets,
                        dry_run=opts.options.dry_run,
                        host_institute=opts.options.host_institute)

    except Exception:
        logger.exception("Failure obtaining %s inodes" % backend)
        opts.critical("Failure to obtain %s inodes information" % backend)

    opts.epilogue("Logged %s inodes" % backend, stats)
コード例 #15
0
def main():
    """The main."""

    # Note: debug option is provided by generaloption
    # Note: other settings, e.g., ofr each cluster will be obtained from the configuration file
    options = {
        'nagios-check-interval-threshold':
        NAGIOS_CHECK_INTERVAL_THRESHOLD,
        'location':
        ('path to store the gzipped files', None, 'store', INODE_LOG_ZIP_PATH),
    }

    opts = ExtendedSimpleOption(options)
    logger = opts.log

    stats = {}

    try:
        gpfs = GpfsOperations()
        filesets = gpfs.list_filesets()
        quota = gpfs.list_quota()

        if not os.path.exists(opts.options.location):
            os.makedirs(opts.options.location, 0o755)

        critical_filesets = dict()

        for filesystem in filesets:
            stats["%s_inodes_log_critical" %
                  (filesystem, )] = INODE_STORE_LOG_CRITICAL
            try:
                filename = "gpfs_inodes_%s_%s.gz" % (
                    time.strftime("%Y%m%d-%H:%M"), filesystem)
                path = os.path.join(opts.options.location, filename)
                zipfile = gzip.open(path, 'wb', 9)  # Compress to the max
                zipfile.write(json.dumps(filesets[filesystem]))
                zipfile.close()
                stats["%s_inodes_log" % (filesystem, )] = 0
                logger.info("Stored inodes information for FS %s" %
                            (filesystem))

                cfs = process_inodes_information(filesets[filesystem],
                                                 quota[filesystem]['FILESET'],
                                                 threshold=0.9)
                logger.info("Processed inodes information for filesystem %s" %
                            (filesystem, ))
                if cfs:
                    critical_filesets[filesystem] = cfs
                    logger.info(
                        "Filesystem %s has at least %d filesets reaching the limit"
                        % (filesystem, len(cfs)))

            except Exception:
                stats["%s_inodes_log" % (filesystem, )] = 1
                logger.exception(
                    "Failed storing inodes information for FS %s" %
                    (filesystem))

        logger.info("Critical filesets: %s" % (critical_filesets, ))

        if critical_filesets:
            mail_admins(critical_filesets, opts.options.dry_run)

    except Exception:
        logger.exception("Failure obtaining GPFS inodes")
        opts.critical("Failure to obtain GPFS inodes information")

    opts.epilogue("Logged GPFS inodes", stats)
コード例 #16
0
ファイル: dquota.py プロジェクト: itkovian/master-scripts
def main():
    """Main script"""

    options = {
        'nagios': ('print out nagios information', None, 'store_true', False, 'n'),
        'nagios-check-filename': ('filename of where the nagios check data is stored', str, 'store', NAGIOS_CHECK_FILENAME),
        'nagios-check-interval-threshold': ('threshold of nagios checks timing out', None, 'store', NAGIOS_CHECK_INTERVAL_THRESHOLD),
        'storage': ('the VSC filesystems that are checked by this script', None, 'extend', []),
        'dry-run': ('do not make any updates whatsoever', None, 'store_true', False),
    }
    opts = simple_option(options)

    logger.info('started GPFS quota check run.')

    nagios_reporter = NagiosReporter(NAGIOS_HEADER,
                                     opts.options.nagios_check_filename,
                                     opts.options.nagios_check_interval_threshold)

    if opts.options.nagios:
        nagios_reporter.report_and_exit()
        sys.exit(0)  # not reached

    lockfile = TimestampedPidLockfile(QUOTA_CHECK_LOCK_FILE)
    lock_or_bork(lockfile, nagios_reporter)

    try:
        user_id_map = map_uids_to_names() # is this really necessary?
        LdapQuery(VscConfiguration())
        gpfs = GpfsOperations()
        filesystems = gpfs.list_filesystems().keys()
        logger.debug("Found the following GPFS filesystems: %s" % (filesystems))

        filesets = gpfs.list_filesets()
        logger.debug("Found the following GPFS filesets: %s" % (filesets))

        quota = gpfs.list_quota()

        for storage in opts.options.storage:

            logger.info("Processing quota for storage %s" % (storage))
            filesystem = opts.configfile_parser.get(storage, 'filesystem')

            if filesystem not in filesystems:
                logger.error("Non-existant filesystem %s" % (filesystem))
                continue

            if filesystem not in quota.keys():
                logger.error("No quota defined for storage %s [%s]" % (storage, filesystem))
                continue

            quota_storage_map = get_mmrepquota_maps(quota[filesystem], storage,filesystem, filesets)

            exceeding_filesets = process_fileset_quota(gpfs, storage, filesystem, quota_storage_map['FILESET'])
            exceeding_users = process_user_quota(gpfs, storage, filesystem, quota_storage_map['USR'], user_id_map)

            logger.warning("storage %s found %d filesets that are exceeding their quota: %s" % (storage,
                                                                                                len(exceeding_filesets),
                                                                                                exceeding_filesets))
            logger.warning("storage %s found %d users who are exceeding their quota: %s" % (storage,
                                                                                            len(exceeding_users),
                                                                                            exceeding_users))

            notify_exceeding_filesets(gpfs=gpfs,
                                      storage=storage,
                                      filesystem=filesystem,
                                      exceeding_items=exceeding_filesets,
                                      dry_run=opts.options.dry_run)
            notify_exceeding_users(gpfs=gpfs,
                                   storage=storage,
                                   filesystem=filesystem,
                                   exceeding_items=exceeding_users,
                                   dry_run=opts.options.dry_run)

        sys.exit(1)

    except Exception, err:
        logger.exception("critical exception caught: %s" % (err))
        if not opts.options.dry_run:
            nagios_reporter.cache(NAGIOS_EXIT_CRITICAL, NagiosResult("CRITICAL script failed - %s" % (err.message)))
        if not opts.options.dry_run:
            lockfile.release()
        sys.exit(1)
コード例 #17
0
ファイル: dcheckjob.py プロジェクト: smoors/vsc-jobs
def main():
    # Collect all info

    # Note: debug option is provided by generaloption
    # Note: other settings, e.g., ofr each cluster will be obtained from the configuration file
    options = {
        'nagios-check-interval-threshold':
        NAGIOS_CHECK_INTERVAL_THRESHOLD,
        'hosts':
        ('the hosts/clusters that should be contacted for job information',
         None, 'extend', []),
        'location': ('the location for storing the pickle file: delcatty, muk',
                     str, 'store', 'delcatty'),
        'access_token':
        ('the token that will allow authentication against the account page',
         None, 'store', None),
        'account_page_url': ('', None, 'store', None),
        'target_master':
        ('the master used to execute showq commands', None, 'store', None),
        'target_user':
        ('the user for ssh to the target master', None, 'store', None),
    }

    opts = ExtendedSimpleOption(options)

    try:
        rest_client = AccountpageClient(token=opts.options.access_token)

        gpfs = GpfsOperations()
        storage = VscStorage()
        storage_name = cluster_user_pickle_store_map[opts.options.location]
        login_mount_point = storage[storage_name].login_mount_point
        gpfs_mount_point = storage[storage_name].gpfs_mount_point

        clusters = {}
        for host in opts.options.hosts:
            master = opts.configfile_parser.get(host, "master")
            checkjob_path = opts.configfile_parser.get(host, "checkjob_path")
            clusters[host] = {'master': master, 'path': checkjob_path}

        checkjob = SshCheckjob(opts.options.target_master,
                               opts.options.target_user,
                               clusters,
                               cache_pickle=True,
                               dry_run=opts.options.dry_run)

        (job_information, _, _) = checkjob.get_moab_command_information()

        active_users = job_information.keys()

        logger.debug("Active users: %s" % (active_users))
        logger.debug("Checkjob information: %s" % (job_information))

        nagios_user_count = 0
        nagios_no_store = 0

        stats = {}

        for user in active_users:
            path = get_pickle_path(opts.options.location, user, rest_client)
            try:
                user_queue_information = CheckjobInfo(
                    {user: job_information[user]})
                store_on_gpfs(user, path, "checkjob", user_queue_information,
                              gpfs, login_mount_point, gpfs_mount_point,
                              ".checkjob.json.gz", opts.options.dry_run)
                nagios_user_count += 1
            except Exception:
                logger.exception("Could not store cache file for user %s" %
                                 (user))
                nagios_no_store += 1
        stats["store_users"] = nagios_user_count
        stats["store_fail"] = nagios_no_store
        stats["store_fail_critical"] = STORE_LIMIT_CRITICAL
    except Exception, err:
        logger.exception("critical exception caught: %s" % (err))
        opts.critical("Script failed in a horrible way")
        sys.exit(NAGIOS_EXIT_CRITICAL)
コード例 #18
0
ファイル: vo.py プロジェクト: lexming/vsc-administration
class VscTier2AccountpageVo(VscAccountPageVo):
    """Class representing a VO in the VSC.

    A VO is a special kind of group, identified mainly by its name.
    """

    def __init__(self, vo_id, storage=None, rest_client=None, host_institute=GENT):
        """Initialise"""
        super(VscTier2AccountpageVo, self).__init__(vo_id, rest_client)

        self.vo_id = vo_id
        self.vsc = VSC()
        self.host_institute = host_institute

        if not storage:
            self.storage = VscStorage()
        else:
            self.storage = storage

        self.gpfs = GpfsOperations()
        self.posix = PosixOperations()

        self.dry_run = False

        self._vo_data_quota_cache = None
        self._vo_data_shared_quota_cache = None
        self._vo_scratch_quota_cache = None
        self._institute_quota_cache = None

        self._sharing_group_cache = None

    @property
    def _institute_quota(self):
        if not self._institute_quota_cache:
            all_quota = [mkVscVoSizeQuota(q) for q in
                         whenHTTPErrorRaise(self.rest_client.vo[self.vo.vsc_id].quota.get,
                                            "Could not get quotata from accountpage for VO %s" % self.vo.vsc_id)[1]]
            self._institute_quota_cache = [q for q in all_quota if q.storage['institute'] == self.host_institute]
        return self._institute_quota_cache

    def _get_institute_data_quota(self):
        return [q for q in self._institute_quota if q.storage['storage_type'] == DATA_KEY]

    def _get_institute_non_shared_data_quota(self):
        return [q.hard for q in self._get_institute_data_quota()
                if not q.storage['name'].endswith(STORAGE_SHARED_SUFFIX)]

    def _get_institute_shared_data_quota(self):
        return [q.hard for q in self._get_institute_data_quota()
                if q.storage['name'].endswith(STORAGE_SHARED_SUFFIX)]

    @property
    def vo_data_quota(self):
        if not self._vo_data_quota_cache:
            self._vo_data_quota_cache = self._get_institute_non_shared_data_quota()
            if not self._vo_data_quota_cache:
                self._vo_data_quota_cache = [self.storage[VSC_DATA].quota_vo]

        return self._vo_data_quota_cache[0]  # there can be only one

    @property
    def vo_data_shared_quota(self):
        if not self._vo_data_shared_quota_cache:
            try:
                self._vo_data_shared_quota_cache = self._get_institute_shared_data_quota()[0]
            except IndexError:
                return None
        return self._vo_data_shared_quota_cache

    @property
    def vo_scratch_quota(self):
        if not self._vo_scratch_quota_cache:
            self._vo_scratch_quota_cache = [q for q in self._institute_quota
                                            if q.storage['storage_type'] == SCRATCH_KEY]

        return self._vo_scratch_quota_cache

    @property
    def sharing_group(self):
        if not self.data_sharing:
            return None

        if not self._sharing_group_cache:
            group_name = self.vo.vsc_id.replace(VO_PREFIX_BY_INSTITUTE[self.vo.institute['name']],
                                                VO_SHARED_PREFIX_BY_INSTITUTE[self.vo.institute['name']])
            self._sharing_group_cache = mkVscAutogroup(
                whenHTTPErrorRaise(self.rest_client.autogroup[group_name].get,
                                   "Could not get autogroup %s details" % group_name)[1])

        return self._sharing_group_cache

    @property
    def data_sharing(self):
        return self.vo_data_shared_quota is not None

    def members(self):
        """Return a list with all the VO members in it."""
        return self.vo.members

    def _get_path(self, storage, mount_point="gpfs"):
        """Get the path for the (if any) user directory on the given storage."""

        (path, _) = self.storage.path_templates[self.host_institute][storage]['vo'](self.vo.vsc_id)
        if mount_point == "login":
            mount_path = self.storage[self.host_institute][storage].login_mount_point
        elif mount_point == "gpfs":
            mount_path = self.storage[self.host_institute][storage].gpfs_mount_point
        else:
            logging.error("mount_point (%s)is not login or gpfs", mount_point)
            raise Exception()

        return os.path.join(mount_path, path)

    def _data_path(self, mount_point="gpfs"):
        """Return the path to the VO data fileset on GPFS"""
        return self._get_path(VSC_DATA, mount_point)

    def _data_shared_path(self, mount_point="gpfs"):
        """Return the path the VO shared data fileset on GPFS"""
        return self._get_path(VSC_DATA_SHARED, mount_point)

    def _scratch_path(self, storage, mount_point="gpfs"):
        """Return the path to the VO scratch fileset on GPFS.

        @type storage: string
        @param storage: name of the storage we are looking at.
        """
        return self._get_path(storage, mount_point)

    def _create_fileset(self, filesystem_name, path, parent_fileset=None, fileset_name=None, group_owner_id=None):
        """Create a fileset for the VO on the data filesystem.

        - creates the fileset if it does not already exist
        - sets ownership to the first (active) VO moderator, or to nobody if there is no moderator
        - sets group ownership to the supplied value (group_owner_id) or if that is missing to the
          vsc_id of the VO owning the fileset

        The parent_fileset is used to support older (< 3.5.x) GPFS setups still present in our system
        """
        self.gpfs.list_filesets()
        if not fileset_name:
            fileset_name = self.vo.vsc_id

        if group_owner_id:
            fileset_group_owner_id = group_owner_id
        else:
            fileset_group_owner_id = self.vo.vsc_id_number

        if not self.gpfs.get_fileset_info(filesystem_name, fileset_name):
            logging.info("Creating new fileset on %s with name %s and path %s",
                         filesystem_name, fileset_name, path)
            base_dir_hierarchy = os.path.dirname(path)
            self.gpfs.make_dir(base_dir_hierarchy)

            # HACK to support versions older than 3.5 in our setup
            if parent_fileset is None:
                self.gpfs.make_fileset(path, fileset_name)
            else:
                self.gpfs.make_fileset(path, fileset_name, parent_fileset)
        else:
            logging.info("Fileset %s already exists for VO %s ... not creating again.",
                         fileset_name, self.vo.vsc_id)

        self.gpfs.chmod(0o770, path)

        try:
            moderator = mkVscAccount(self.rest_client.account[self.vo.moderators[0]].get()[1])
        except HTTPError:
            logging.exception("Cannot obtain moderator information from account page, setting ownership to nobody")
            self.gpfs.chown(pwd.getpwnam('nobody').pw_uid, fileset_group_owner_id, path)
        except IndexError:
            logging.error("There is no moderator available for VO %s", self.vo.vsc_id)
            self.gpfs.chown(pwd.getpwnam('nobody').pw_uid, fileset_group_owner_id, path)
        else:
            self.gpfs.chown(moderator.vsc_id_number, fileset_group_owner_id, path)

    def create_data_fileset(self):
        """Create the VO's directory on the HPC data filesystem. Always set the quota."""
        path = self._data_path()
        try:
            fs = self.storage[self.host_institute][VSC_DATA].filesystem
        except AttributeError:
            logging.exception("Trying to access non-existent attribute 'filesystem' in the data storage instance")
        except KeyError:
            logging.exception("Trying to access non-existent field %s in the data storage dictionary", VSC_DATA)
        self._create_fileset(fs, path)

    def create_data_shared_fileset(self):
        """Create a VO directory for sharing data on the HPC data filesystem. Always set the quota."""
        path = self._data_shared_path()
        msg = "Trying to access non-existent"
        try:
            fs = self.storage[self.host_institute][VSC_DATA_SHARED].filesystem
        except AttributeError:
            logging.exception("%s attribute 'filesystem' in the shared data storage instance", msg)
        except KeyError:
            logging.exception("%s field %s in the shared data storage dictionary", msg, VSC_DATA_SHARED)
        self._create_fileset(fs, path,
                             fileset_name=self.sharing_group.vsc_id,
                             group_owner_id=self.sharing_group.vsc_id_number)

    def create_scratch_fileset(self, storage_name):
        """Create the VO's directory on the HPC data filesystem. Always set the quota."""
        msg = "Trying to access non-existent"
        try:
            path = self._scratch_path(storage_name)
            if self.storage[self.host_institute][storage_name].version >= (3, 5, 0, 0):
                self._create_fileset(self.storage[self.host_institute][storage_name].filesystem, path)
            else:
                self._create_fileset(self.storage[self.host_institute][storage_name].filesystem, path, 'root')
        except AttributeError:
            logging.exception("%s attribute 'filesystem' in the scratch storage instance", msg)
        except KeyError:
            logging.exception("%s field %s in the scratch storage dictionary", msg, storage_name)

    def _create_vo_dir(self, path):
        """Create a user owned directory on the GPFS."""
        self.gpfs.make_dir(path)

    def _set_quota(self, storage_name, path, quota, fileset_name=None):
        """Set FILESET quota on the FS for the VO fileset.
        @type quota: int
        @param quota: soft quota limit expressed in KiB
        """
        if not fileset_name:
            fileset_name = self.vo.vsc_id
        try:
            # expressed in bytes, retrieved in KiB from the backend
            hard = quota * 1024 * self.storage[self.host_institute][storage_name].data_replication_factor
            soft = int(hard * self.vsc.quota_soft_fraction)

            # LDAP information is expressed in KiB, GPFS wants bytes.
            self.gpfs.set_fileset_quota(soft, path, fileset_name, hard)
            self.gpfs.set_fileset_grace(path, self.vsc.vo_storage_grace_time)  # 7 days
        except GpfsOperationError:
            logging.exception("Unable to set quota on path %s", path)
            raise

    def set_data_quota(self):
        """Set FILESET quota on the data FS for the VO fileset."""
        if self.vo_data_quota:
            self._set_quota(VSC_DATA, self._data_path(), int(self.vo_data_quota))
        else:
            self._set_quota(VSC_DATA, self._data_path(), 16 * 1024)

    def set_data_shared_quota(self):
        """Set FILESET quota on the data FS for the VO fileset."""
        if self.vo_data_shared_quota:
            self._set_quota(
                VSC_DATA_SHARED,
                self._data_shared_path(),
                int(self.vo_data_shared_quota),
                fileset_name=self.vo.vsc_id.replace(
                    VO_PREFIX_BY_INSTITUTE[self.vo.institute["name"]],
                    VO_SHARED_PREFIX_BY_INSTITUTE[self.vo.institute["name"]],
                ),
            )

    def set_scratch_quota(self, storage_name):
        """Set FILESET quota on the scratch FS for the VO fileset."""
        quota = [q for q in self.vo_scratch_quota if q.storage['name'] in (storage_name,)]

        if not quota:
            logging.error("No VO %s scratch quota information available for %s", self.vo.vsc_id, storage_name)
            logging.info("Setting default VO %s scratch quota on storage %s to %d",
                         self.vo.vsc_id, storage_name, self.storage[storage_name].quota_vo)
            self._set_quota(storage_name, self._scratch_path(storage_name), self.storage[storage_name].quota_vo)
            return
        elif len(quota) > 1:
            logging.exception("Cannot set scratch quota for %s with multiple quota instances %s",
                              storage_name, quota)
            raise

        logging.info("Setting VO %s quota on storage %s to %d", self.vo.vsc_id, storage_name, quota[0].hard)
        self._set_quota(storage_name, self._scratch_path(storage_name), quota[0].hard)

    def _set_member_quota(self, storage_name, path, member, quota):
        """Set USER quota on the FS for the VO fileset

        @type member: VscTier2AccountpageUser
        @type quota: integer (hard value)
        """
        try:
            hard = quota * 1024 * self.storage[self.host_institute][storage_name].data_replication_factor
            soft = int(hard * self.vsc.quota_soft_fraction)

            self.gpfs.set_user_quota(soft=soft, user=int(member.account.vsc_id_number), obj=path, hard=hard)
        except GpfsOperationError:
            logging.exception("Unable to set USR quota for member %s on path %s", member.account.vsc_id, path)
            raise

    def set_member_data_quota(self, member):
        """Set the quota on the data FS for the member in the VO fileset.

        @type member: VscTier2AccountPageUser instance

        The user can have up to half of the VO quota.
        FIXME: This should probably be some variable in a config setting instance
        """
        if not self.vo_data_quota:
            logging.warning("Not setting VO %s member %s data quota: no VO data quota info available",
                            self.vo.vsc_id, member.account.vsc_id)
            return

        if self.vo.vsc_id in DEFAULT_VOS_ALL:
            logging.warning("Not setting VO %s member %s data quota: No VO member quota for this VO",
                            member.account.vsc_id, self.vo.vsc_id)
            return

        if member.vo_data_quota:
            # users having belonged to multiple VOs have multiple quota on VSC_DATA, so we
            # only need to deploy the quota for the VO the user currently belongs to.
            quota = [q for q in member.vo_data_quota
                     if q.fileset == self.vo.vsc_id and not q.storage['name'].endswith(STORAGE_SHARED_SUFFIX)]
            if len(quota) > 1:
                logging.exception("Cannot set data quota for member %s with multiple quota instances %s",
                                  member, quota)
                raise
            else:
                logging.info("Setting the data quota for VO %s member %s to %d KiB",
                             self.vo.vsc_id, member.account.vsc_id, quota[0].hard)
                self._set_member_quota(VSC_DATA, self._data_path(), member, quota[0].hard)
        else:
            logging.error("No VO %s data quota set for member %s", self.vo.vsc_id, member.account.vsc_id)

    def set_member_scratch_quota(self, storage_name, member):
        """Set the quota on the scratch FS for the member in the VO fileset.

        @type member: VscTier2AccountpageUser instance

        The user can have up to half of the VO quota.
        FIXME: This should probably be some variable in a config setting instance
        """
        if not self.vo_scratch_quota:
            logging.warning("Not setting VO %s member %s scratch quota: no VO quota info available",
                            self.vo.vsc_id, member.account.vsc_id)
            return

        if self.vo.vsc_id in DEFAULT_VOS_ALL:
            logging.warning("Not setting VO %s member %s scratch quota: No VO member quota for this VO",
                            member.account.vsc_id, self.vo.vsc_id)
            return

        if member.vo_scratch_quota:
            quota = [q for q in member.vo_scratch_quota
                     if q.storage['name'] in (storage_name,) and q.fileset in (self.vo_id,)]
            if quota:
                logging.info("Setting the scratch quota for VO %s member %s to %d GiB on %s",
                             self.vo.vsc_id, member.account.vsc_id, quota[0].hard / 1024 / 1024, storage_name)
                self._set_member_quota(storage_name, self._scratch_path(storage_name), member, quota[0].hard)
            else:
                logging.error("No VO %s scratch quota for member %s on %s after filter (all %s)",
                              self.vo.vsc_id, member.account.vsc_id, storage_name, member.vo_scratch_quota)
        else:
            logging.error("No VO %s scratch quota set for member %s on %s",
                          self.vo.vsc_id, member.account.vsc_id, storage_name)

    def _create_member_dir(self, member, target):
        """Create a member-owned directory in the VO fileset."""
        self.gpfs.create_stat_directory(
            target,
            0o700,
            int(member.account.vsc_id_number),
            int(member.usergroup.vsc_id_number),
            # we should not override permissions on an existing dir where users may have changed them
            override_permissions=False)

    def create_member_data_dir(self, member):
        """Create a directory on data in the VO fileset that is owned
        by the member with name $VSC_DATA_VO/<vscid>."""
        target = os.path.join(self._data_path(), member.user_id)
        self._create_member_dir(member, target)

    def create_member_scratch_dir(self, storage_name, member):
        """Create a directory on scratch in the VO fileset that is owned
        by the member with name $VSC_SCRATCH_VO/<vscid>."""
        target = os.path.join(self._scratch_path(storage_name), member.user_id)
        self._create_member_dir(member, target)

    def __setattr__(self, name, value):
        """Override the setting of an attribute:

        - dry_run: set this here and in the gpfs and posix instance fields.
        - otherwise, call super's __setattr__()
        """

        if name == 'dry_run':
            self.gpfs.dry_run = value
            self.posix.dry_run = value

        super(VscTier2AccountpageVo, self).__setattr__(name, value)
コード例 #19
0
def main():
    """Main script"""

    options = {
        'nagios-check-interval-threshold':
        NAGIOS_CHECK_INTERVAL_THRESHOLD,
        'storage': ('the VSC filesystems that are checked by this script',
                    None, 'extend', []),
        'write-cache': ('Write the data into the cache files in the FS', None,
                        'store_true', False),
        'account_page_url': ('Base URL of the account page', None, 'store',
                             'https://account.vscentrum.be/django'),
        'access_token': ('OAuth2 token to access the account page REST API',
                         None, 'store', None),
        'host_institute':
        ('Name of the institute where this script is being run', str, 'store',
         GENT),
    }
    opts = ExtendedSimpleOption(options)
    logger = opts.log

    try:
        client = AccountpageClient(token=opts.options.access_token)

        user_id_map = map_uids_to_names()  # is this really necessary?
        gpfs = GpfsOperations()
        storage = VscStorage()

        target_filesystems = [
            storage[s].filesystem for s in opts.options.storage
        ]

        filesystems = gpfs.list_filesystems(device=target_filesystems).keys()
        logger.debug("Found the following GPFS filesystems: %s" %
                     (filesystems))

        filesets = gpfs.list_filesets(devices=target_filesystems)
        logger.debug("Found the following GPFS filesets: %s" % (filesets))

        quota = gpfs.list_quota(devices=target_filesystems)
        exceeding_filesets = {}
        exceeding_users = {}
        stats = {}

        for storage_name in opts.options.storage:

            logger.info("Processing quota for storage_name %s" %
                        (storage_name))
            filesystem = storage[storage_name].filesystem
            replication_factor = storage[storage_name].data_replication_factor

            if filesystem not in filesystems:
                logger.error("Non-existent filesystem %s" % (filesystem))
                continue

            if filesystem not in quota.keys():
                logger.error("No quota defined for storage_name %s [%s]" %
                             (storage_name, filesystem))
                continue

            quota_storage_map = get_mmrepquota_maps(
                quota[filesystem],
                storage_name,
                filesystem,
                filesets,
                replication_factor,
            )

            exceeding_filesets[storage_name] = process_fileset_quota(
                storage,
                gpfs,
                storage_name,
                filesystem,
                quota_storage_map['FILESET'],
                client,
                dry_run=opts.options.dry_run,
                institute=opts.options.host_institute)

            exceeding_users[storage_name] = process_user_quota(
                storage,
                gpfs,
                storage_name,
                None,
                quota_storage_map['USR'],
                user_id_map,
                client,
                dry_run=opts.options.dry_run,
                institute=opts.options.host_institute)

            stats["%s_fileset_critical" %
                  (storage_name, )] = QUOTA_FILESETS_CRITICAL
            if exceeding_filesets[storage_name]:
                stats["%s_fileset" % (storage_name, )] = 1
                logger.warning(
                    "storage_name %s found %d filesets that are exceeding their quota",
                    storage_name, len(exceeding_filesets))
                for (e_fileset, e_quota) in exceeding_filesets[storage_name]:
                    logger.warning("%s has quota %s" %
                                   (e_fileset, str(e_quota)))
            else:
                stats["%s_fileset" % (storage_name, )] = 0
                logger.debug(
                    "storage_name %s found no filesets that are exceeding their quota"
                    % storage_name)

            stats["%s_users_warning" % (storage_name, )] = QUOTA_USERS_WARNING
            stats["%s_users_critical" %
                  (storage_name, )] = QUOTA_USERS_CRITICAL
            if exceeding_users[storage_name]:
                stats["%s_users" % (storage_name, )] = len(
                    exceeding_users[storage_name])
                logger.warning(
                    "storage_name %s found %d users who are exceeding their quota"
                    % (storage_name, len(exceeding_users[storage_name])))
                for (e_user_id, e_quota) in exceeding_users[storage_name]:
                    logger.warning("%s has quota %s" %
                                   (e_user_id, str(e_quota)))
            else:
                stats["%s_users" % (storage_name, )] = 0
                logger.debug(
                    "storage_name %s found no users who are exceeding their quota"
                    % storage_name)

    except Exception as err:
        logger.exception("critical exception caught: %s" % (err))
        opts.critical("Script failed in a horrible way")

    opts.epilogue("quota check completed", stats)