Ejemplo n.º 1
0
def notify_exceeding_items(gpfs, storage, filesystem, exceeding_items, target, dry_run=False):
    """Send out notification to the fileset owners.

    - if the fileset belongs to a VO: the VO moderator
    - if the fileset belongs to a project: the project moderator
    - if the fileset belongs to a user: the user

    The information is cached. The mail is sent in the following cases:
        - the excession is new
        - the excession occurred more than 7 days ago and stayed in the cache. In this case, the cache is updated as
          to avoid sending outdated mails repeatedly.
    """

    cache_path = os.path.join(gpfs.list_filesystems()[filesystem]['defaultMountPoint'], ".quota_%s_cache.json.gz" % (target))
    cache = FileCache(cache_path, True)  # we retain the old data

    logger.info("Processing %d exceeding items" % (len(exceeding_items)))

    for (item, quota) in exceeding_items:
        updated = cache.update(item, quota, QUOTA_NOTIFICATION_CACHE_THRESHOLD)
        logger.info("Storage %s: cache entry for %s was updated: %s" % (storage, item, updated))
        if updated:
            notify(storage, item, quota, dry_run)

    if not dry_run:
        cache.close()
    else:
        logger.info("Dry run: not saving the updated cache")
Ejemplo n.º 2
0
def print_vo_quota(opts, storage, vos, now):
    """
    Print the quota for the VO fileset.
    """
    print "\nVO quota:"
    for storage_name in [s for s in opts.options.storage if s != 'VSC_HOME']:  # No VOs on VSC_HOME atm

        mount_point = storage[storage_name].login_mount_point
        path_template = storage.path_templates[storage_name]['vo']
        path = os.path.join(mount_point, path_template[0], path_template[1](vos[0]), ".quota_fileset.json.gz")

        cache = FileCache(path, True)
        try:
            (timestamp, quota) = cache.load('quota')
        except TypeError:
            logger.debug("Cannot load data from %s" % (path,))
            print "%s: WARNING: No VO quota information found" % (storage_name,)
            continue

        if now - timestamp > opts.options.threshold:
            print "%s: WARNING: no recent VO quota information (age of data is %d minutes)" % (storage_name,
                                                                                                (now-timestamp)/60)
        else:
            for (fileset, qi) in quota.quota_map.items():
                pp = quota_pretty_print(storage_name, fileset, qi, opts.options.fileset_prefixes)
                if pp:
                    print pp
Ejemplo n.º 3
0
def print_user_quota(opts, storage, user_name, now):
    """
    Print the quota for the user, i.e., USR quota in all filesets the user has access to.
    """
    print "User quota:"
    for storage_name in opts.options.storage:

        mount_point = storage[storage_name].login_mount_point
        path_template = storage.path_templates[storage_name]['user']
        path = os.path.join(mount_point, path_template[0], path_template[1](user_name), ".quota_user.json.gz")

        cache = FileCache(path, True)
        try:
            (timestamp, quota) = cache.load('quota')
        except TypeError:
            logger.debug("Cannot load data from %s" % (path,))
            print "%s: WARNING: No quota information found" % (storage_name,)
            continue

        if now - timestamp > opts.options.threshold:
            print "%s: WARNING: no recent quota information (age of data is %d minutes)" % (storage_name,
                                                                                            (now-timestamp)/60)
        else:
            for (fileset, qi) in quota.quota_map.items():
                pp = quota_pretty_print(storage_name, fileset, qi, opts.options.fileset_prefixes)
                if pp:
                    print pp
Ejemplo n.º 4
0
    def report_and_exit(self):
        """Unzips the cache file and reads the JSON data back in, prints the data and exits accordingly.

        If the cache data is too old (now - cache timestamp > self.threshold), a critical exit is produced.
        """
        try:
            nagios_cache = FileCache(self.filename, True)
        except:
            self.log.critical("Error opening file %s for reading" %
                              (self.filename))
            unknown_exit("%s nagios gzipped JSON file unavailable (%s)" %
                         (self.header, self.filename))

        (timestamp, ((nagios_exit_code, nagios_exit_string),
                     nagios_message)) = nagios_cache.load('nagios')
        nagios_cache.close()

        if self.threshold <= 0 or time.time() - timestamp < self.threshold:
            self.log.info("Nagios check cache file %s contents delivered: %s" %
                          (self.filename, nagios_message))
            print "%s %s" % (nagios_exit_string, nagios_message)
            sys.exit(nagios_exit_code)
        else:
            unknown_exit("%s gzipped JSON file too old (timestamp = %s)" %
                         (self.header, time.ctime(timestamp)))
Ejemplo n.º 5
0
def main():

    options = {
        'storage': ('the VSC filesystems that are checked by this script', None, 'extend', []),
        'threshold': ('allowed the time difference between the cached quota and the time of running', None, 'store',
                      DEFAULT_ALLOWED_TIME_THRESHOLD),
    }
    opts = simple_option(options, config_files='/etc/quota_information.conf')

    storage = VscStorage()
    user_name = getpwuid(os.getuid())[0]
    now = time.time()

    for storage_name in opts.options.storage:

        mount_point = storage[storage_name].login_mount_point
        path_template = storage.path_templates[storage_name]['user']
        path = os.path.join(mount_point, path_template[0], path_template(user_name))

        cache = FileCache(path)
        (timestamp, quota) = cache.load('quota')

        if now - timestamp > opts.options.threshold:
            print "%s: WARNING: no recent quota information (age of data is %d minutes)" % (storage_name,

                                                                                               (now-timestamp)/60)
        else:
            for (fileset, qi) in quota.quota_map.items():
            print "%s: used %d MiB (%d%%) quota %d MiB in fileset %d" % (storage_name,
                                                           quota)


if __name__ == '__main__':
    main()
Ejemplo n.º 6
0
    def cache(self, nagios_exit, nagios_message):
        """Store the result in the cache file with a timestamp.

        @type nagios_exit: one of NAGIOS_EXIT_OK, NAGIOS_EXIT_WARNING, NAGIOS_EXIT_CRTITCAL or NAGIOS_EXIT_UNKNOWN
        @type nagios_message: string

        @param nagios_exit: a valid nagios exit code.
        @param nagios_message: the message to print out when the actual check runs.
        """
        try:
            nagios_cache = FileCache(self.filename)
            nagios_cache.update(0, (nagios_exit, nagios_message), 0)  # always update
            nagios_cache.close()
            self.log.info("Wrote nagios check cache file %s at about %s" % (self.filename, time.ctime(time.time())))
        except:
            # raising an error is ok, since we usually do this as the very last thing in the script
            self.log.raiseException("Cannot save to the nagios pickled file (%s)" % (self.filename))

        try:
            p = pwd.getpwnam(self.nagios_username)
            os.chmod(self.filename, stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IWGRP)
            os.chown(self.filename, p.pw_uid, p.pw_gid)
        except:
            self.log.raiseException("Cannot chown the nagios check file %s to the nagios user" % (self.filename))

        return True
Ejemplo n.º 7
0
def store_on_gpfs(user_name, path, key, information, gpfs, login_mount_point, gpfs_mount_point, filename,
                  dry_run=False):
    """
    Store the given information in a cache file that resides in a user's directory.

    @type user_name: string
    @type path: string, representing a directory
    @type key: string, name for the kind of information we are going to store in the cache
    @type showq_information: a recursive dict structure
    @type gpfs: GpfsOperations instance
    @type login_mount_point: path representing the mount point of the storage location on the login nodes
    @type gpfs_mount_point: path representing the mount point of the storage location when GPFS mounted
    @type dry_run: boolean
    """

    if user_name and user_name.startswith('vsc4'):
        logger.debug("Storing %s information for user %s" % (key, user_name,))
        logger.debug("information: %s" % (information,))
        logger.debug("path for storing information would be %s" % (path,))

        # FIXME: We need some better way to address this
        # Right now, we replace the nfs mount prefix which the symlink points to
        # with the gpfs mount point. this is a workaround until we resolve the
        # symlink problem once we take new default scratch into production
        if gpfs.is_symlink(path):
            target = os.path.realpath(path)
            logger.debug("path is a symlink, target is %s" % (target,))
            logger.debug("login_mount_point is %s" % (login_mount_point,))
            if target.startswith(login_mount_point):
                new_path = target.replace(login_mount_point, gpfs_mount_point, 1)
                logger.info("Found a symlinked path %s to the nfs mount point %s. Replaced with %s" %
                            (path, login_mount_point, gpfs_mount_point))
            else:
                logger.warning("Unable to store quota information for %s on %s; symlink cannot be resolved properly"
                                % (user_name, path))
        else:
            new_path = path

        path_stat = os.stat(new_path)
        filename = os.path.join(new_path, filename)

        if dry_run:
            logger.info("Dry run: would update cache for at %s with %s" % (new_path, "%s" % (information,)))
            logger.info("Dry run: would chmod 640 %s" % (filename,))
            logger.info("Dry run: would chown %s to %s %s" % (filename, path_stat.st_uid, path_stat.st_gid))
        else:
            cache = FileCache(filename, False)  # data need not be retained
            cache.update(key=key, data=information, threshold=0)
            cache.close()

            gpfs.ignorerealpathmismatch = True
            gpfs.chmod(0o640, filename)
            gpfs.chown(path_stat.st_uid, path_stat.st_uid, filename)
            gpfs.ignorerealpathmismatch = False

        logger.info("Stored user %s %s information at %s" % (user_name, key, filename))
Ejemplo n.º 8
0
def read_timestamp(filename):
    """Read the stored timestamp value from a pickled file.

    @returns: string representing a timestamp in the proper LDAP time format

    """
    cache = FileCache(filename)
    (_, timestamp) = cache.load('timestamp')

    return timestamp
Ejemplo n.º 9
0
    def test_save_and_load(self):
        """Check if the loaded data is the same as the saved data."""
        # test with random data
        data, threshold = get_rand_data()

        # create a tempfilename
        (handle, filename) = tempfile.mkstemp()
        os.unlink(filename)
        os.close(handle)
        cache = FileCache(filename)
        for (key, value) in data.items():
            cache.update(key, value, threshold)
        cache.close()

        now = time.time()
        new_cache = FileCache(filename)
        for key in data.keys():
            info = cache.load(key)
            self.assertTrue(info is not None)
            (ts, value) = info
            self.assertTrue(value == data[key])
            self.assertTrue(ts <= now)
        new_cache.close()

        os.unlink(filename)
Ejemplo n.º 10
0
def write_timestamp(filename, timestamp):
    """Write the given timestamp to a pickled file.

    @type timestamp: datetime.datetime timestamp
    """

    if timestamp.tzinfo is None:
        # add local timezoneinfo
        timestamp = timestamp.replace(tzinfo=Local)

    cache = FileCache(filename)
    cache.update(0, timestamp, 0)
    cache.close()
Ejemplo n.º 11
0
 def test_wirte_donefile(self):
     """ Test the writing of the values to a cache file when done"""
     donefile = "/tmp/done"
     values = {
         'completed' : 50,
         'failed' : 5,
         'unfinished' : 0
     }
     zkclient = RsyncSource('dummy', session='new', netcat=True, rsyncpath='/path/dummy', rsyncdepth=2, done_file=donefile)
     zkclient.write_donefile(values)
     cache_file = FileCache(donefile)
     (timestamp, stats) = cache_file.load('stats')
     self.assertEqual(values, stats)
Ejemplo n.º 12
0
def read_timestamp(filename):
    """Read the stored timestamp value from a pickled file.

    @returns: string representing a timestamp in the proper LDAP time format

    """
    cache = FileCache(filename)
    (_, timestamp) = cache.load(0)

    if not timestamp is None and timestamp.tzinfo is None:
        # add local timezoneinfo
        timestamp = timestamp.replace(tzinfo=Local)

    return timestamp
Ejemplo n.º 13
0
def read_timestamp(filename):
    """Read the stored timestamp value from a pickled file.

    @returns: string representing a timestamp in the proper LDAP time format

    """
    cache = FileCache(filename)
    (_, timestamp) = cache.load(0)

    if not timestamp is None and timestamp.tzinfo is None:
        # add local timezoneinfo
        timestamp = timestamp.replace(tzinfo=Local)

    return timestamp
Ejemplo n.º 14
0
def read_timestamp(filename):
    """Read the stored timestamp value from a pickled file.

    @returns: a timestamp in whatever format it was stored in (string LDAP timestamp, unix epoch, ...)
    """
    cache = FileCache(filename)
    try:
        (_, timestamp) = cache.load('timestamp')
    except TypeError:
        logging.warning('could not load timestamp from cache file %s',
                        filename)
        timestamp = None

    return timestamp
Ejemplo n.º 15
0
 def test_wirte_donefile(self):
     """ Test the writing of the values to a cache file when done"""
     donefile = "/tmp/done"
     values = {'completed': 50, 'failed': 5, 'unfinished': 0}
     zkclient = RsyncSource('dummy',
                            session='new',
                            netcat=True,
                            rsyncpath='/path/dummy',
                            rsyncdepth=2,
                            done_file=donefile)
     zkclient.write_donefile(values)
     cache_file = FileCache(donefile)
     (timestamp, stats) = cache_file.load('stats')
     self.assertEqual(values, stats)
Ejemplo n.º 16
0
def read_cache(path):
    """
    Unpickle the file and fill in the resulting datastructure.
    """
    try:
        cache = FileCache(path)
    except Exception:
        print "Failed to load checkjob information from %s" % (path,)

    res = cache.load('checkjob')
    if res[0] < (time.time() - MAXIMAL_AGE):
        print "The data in the checkjob cache may be outdated. Please contact your admin to look into this."

    return res[1]  # CheckjobInfo
Ejemplo n.º 17
0
def read_cache(path):
    """
    Unpickle the file and fill in the resulting datastructure.
    """
    try:
        cache = FileCache(path)
    except Exception:
        print "Failed to load checkjob information from %s" % (path, )

    res = cache.load('checkjob')
    if res[0] < (time.time() - MAXIMAL_AGE):
        print "The data in the checkjob cache may be outdated. Please contact your admin to look into this."

    return res[1]  # CheckjobInfo
Ejemplo n.º 18
0
    def test_contents(self, data, threshold):
        """Check that the contents of the cache is what is expected prior to closing it."""
        # create a tempfilename
        (handle, filename) = tempfile.mkstemp(dir='/tmp')
        os.unlink(filename)
        cache = FileCache(filename)
        for (key, value) in data.items():
            cache.update(key, value, threshold)

        now = time.time()
        for key in data.keys():
            info = cache.load(key)
            self.assertFalse(info is None)
            (ts, value) = info
            self.assertTrue(value == data[key])
            self.assertTrue(ts <= now)
Ejemplo n.º 19
0
def write_timestamp(filename, timestamp):
    """Write the given timestamp to a pickled file.

    @type timestamp: datetime.datetime timestamp
    """

    if isinstance(timestamp, datetime.datetime) and timestamp.tzinfo is None:
        # add local timezoneinfo
        timestamp_ = timestamp.replace(tzinfo=Local)
        (_, timestamp_) = convert_timestamp(timestamp)
    else:
        timestamp_ = timestamp

    cache = FileCache(filename)
    cache.update('timestamp', timestamp_, 0)
    cache.close()
Ejemplo n.º 20
0
def process_user_quota(gpfs, storage, filesystem, quota_map, user_map):
    """Store the information in the user directories.
    """
    exceeding_users = []

    for (user_id, quota) in quota_map.items():

        user_name = user_map.get(int(user_id), None)

        logger.debug("Checking quota for user %s with ID %s" % (user_name, user_id))

        if user_name and user_name.startswith('vsc'):
            user = VscUser(user_name)
            logger.debug("User %s quota: %s" % (user, quota))

            path = user._get_path(storage)
            path_stat = os.stat(path)
            filename = os.path.join(path, ".quota_user.json.gz")

            cache = FileCache(filename)
            cache.update(key="quota", data=quota, threshold=0)
            cache.update(key="storage", data=storage, threshold=0)
            cache.close()

            gpfs.ignorerealpathmismatch = True
            gpfs.chmod(0640, filename)
            gpfs.chown(path_stat.st_uid, path_stat.st_uid, filename)
            gpfs.ignorerealpathmismatch = False

            logger.info("Stored user %s quota for storage %s at %s" % (user_name, storage, filename))

            if quota.exceeds():
                exceeding_users.append((user, quota))

    return exceeding_users
Ejemplo n.º 21
0
def process_fileset_quota(gpfs, storage, filesystem, quota_map):
    """Store the quota information in the filesets.
    """

    filesets = gpfs.list_filesets()
    exceeding_filesets = []

    logger.info("filesets = %s" % (filesets))

    for (fileset, quota) in quota_map.items():
        logger.debug("Fileset %s quota: %s" % (filesets[filesystem][fileset]['filesetName'], quota))

        path = filesets[filesystem][fileset]['path']
        filename = os.path.join(path, ".quota_fileset.json.gz")
        path_stat = os.stat(path)

        # TODO: This should somehow be some atomic operation.
        cache = FileCache(filename)
        cache.update(key="quota", data=quota, threshold=0)
        cache.update(key="storage", data=storage, threshold=0)
        cache.close()

        gpfs.chmod(0640, filename)
        gpfs.chown(path_stat.st_uid, path_stat.st_gid, filename)

        logger.info("Stored fileset %s quota for storage %s at %s" % (fileset, storage, filename))

        #if quota.exceeds():
        if True:
            exceeding_filesets.append((fileset, quota))

    return exceeding_filesets
Ejemplo n.º 22
0
    def _load_pickle_cluster_file(self, host, raw=True):
        """Load the data from the pickled files.

        @type host: string

        @param host: cluster for which we load data

        @returns: representation of the showq output.
        """
        source = os.path.join(self._cache_pickle_directory(), self._cache_pickle_name(host))

        if raw:
            f = open(source, 'r')
            output = cPickle.load(f)
            f.close()
            return output
        else:
            cache = FileCache(source)
            return cache.load(self.cache_key)
Ejemplo n.º 23
0
def read_cache(owner, showvo, running, idle, blocked, path):
    """
    Unpickle the file and fill in the resulting datastructure.
    """

    try:
        cache = FileCache(path)
    except:
        print "Failed to load showq information from %s" % (path,)

    res = cache.load('showq')[1][0]
    user_map = cache.load('showq')[1][1]
    ## check for timeinfo
    if res['timeinfo'] < (time.time() - MAXIMAL_AGE):
        print "The data in the showq cache may be outdated. Please contact your admin to look into this."
    #    return (None, None)

    del res['timeinfo']

    logger.debug("Resulting cache data: %s" % (res))

    # Filter out data that is not needed
    if not showvo:
        for user in res.keys():
            if not user == owner:
                #del res[user]
                pass

    for user in res.keys():
        for host in res[user].keys():
            logger.debug("looking at host %s" % (host))
            states = res[user][host].keys()
            if not running:
                if 'Running' in states:
                    del res[user][host]['Running']
            if not idle:
                if 'Idle' in states:
                    del res[user][host]['Idle']
            if not blocked:
                for state in [x for x in states if not x in ('Running','Idle')]:
                    del res[user][host][state]

    return (res, user_map)
Ejemplo n.º 24
0
 def test_corrupt_gz_cache(self):
     """Test to see if we can handle a corrupt cache file"""
     tempdir = tempfile.mkdtemp()
     # create a tempfilename
     (handle, filename) = tempfile.mkstemp(dir=tempdir)
     f = os.fdopen(handle, 'w')
     f.write('blabla;not gz')
     f.close()
     FileCache(filename)
     shutil.rmtree(tempdir)
Ejemplo n.º 25
0
    def report_and_exit(self):
        """Unpickles the cache file, prints the data and exits accordingly.

        If the cache data is too old (now - cache timestamp > self.threshold), a critical exit is produced.
        """
        try:
            nagios_cache = FileCache(self.filename, True)
        except:
            self.log.critical("Error opening file %s for reading" % (self.filename))
            unknown_exit("%s nagios pickled file unavailable (%s)" % (self.header, self.filename))

        (timestamp, ((nagios_exit_code, nagios_exit_string), nagios_message)) = nagios_cache.load(0)
        nagios_cache.close()

        if self.threshold < 0 or time.time() - timestamp < self.threshold:
            self.log.info("Nagios check cache file %s contents delivered: %s" % (self.filename, nagios_message))
            print "%s %s" % (nagios_exit_string, nagios_message)
            sys.exit(nagios_exit_code)
        else:
            unknown_exit("%s pickled file too old (timestamp = %s)" % (self.header, time.ctime(timestamp)))
Ejemplo n.º 26
0
    def cache(self, nagios_exit, nagios_message):
        """Store the result in the cache file with a timestamp.

        @type nagios_exit: one of NAGIOS_EXIT_OK, NAGIOS_EXIT_WARNING, NAGIOS_EXIT_CRTITCAL or NAGIOS_EXIT_UNKNOWN
        @type nagios_message: string

        @param nagios_exit: a valid nagios exit code.
        @param nagios_message: the message to print out when the actual check runs.
        """
        try:
            nagios_cache = FileCache(self.filename)
            nagios_cache.update(0, (nagios_exit, nagios_message),
                                0)  # always update
            nagios_cache.close()
            self.log.info("Wrote nagios check cache file %s at about %s" %
                          (self.filename, time.ctime(time.time())))
        except:
            # raising an error is ok, since we usually do this as the very last thing in the script
            self.log.raiseException(
                "Cannot save to the nagios pickled file (%s)" %
                (self.filename))

        try:
            p = pwd.getpwnam(self.nagios_username)
            os.chmod(self.filename,
                     stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IWGRP)
            os.chown(self.filename, p.pw_uid, p.pw_gid)
        except:
            self.log.raiseException(
                "Cannot chown the nagios check file %s to the nagios user" %
                (self.filename))

        return True
Ejemplo n.º 27
0
    def cache(self, nagios_exit, nagios_message):
        """Store the result in the cache file with a timestamp.

        @type nagios_exit: one of NAGIOS_EXIT_OK, NAGIOS_EXIT_WARNING, NAGIOS_EXIT_CRTITCAL or NAGIOS_EXIT_UNKNOWN
        @type nagios_message: string

        @param nagios_exit: a valid nagios exit code.
        @param nagios_message: the message to print out when the actual check runs.
        """
        try:
            nagios_cache = FileCache(self.filename)
            nagios_cache.update('nagios', (nagios_exit, nagios_message), 0)  # always update
            nagios_cache.close()
            self.log.info("Wrote nagios check cache file %s at about %s" % (self.filename, time.ctime(time.time())))
        except (IOError, OSError):
            # raising an error is ok, since we usually do this as the very last thing in the script
            self.log.raiseException("Cannot save to the nagios gzipped JSON file (%s)" % (self.filename))

        try:
            p = pwd.getpwnam(self.nagios_username)
            if self.world_readable:
                os.chmod(self.filename, stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IWGRP | stat.S_IROTH)
            else:
                os.chmod(self.filename, stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IWGRP)

            # only change owner/group when run as root
            if os.geteuid() == 0:
                os.chown(self.filename, p.pw_uid, p.pw_gid)
            else:
                self.log.warn("Not running as root: Cannot chown the nagios check file %s to %s" %
                              (self.filename, self.nagios_username))
        except OSError:
            self.log.raiseException("Cannot chown the nagios check file %s to the nagios user" % (self.filename))

        return True
Ejemplo n.º 28
0
def process_user_quota(storage, gpfs, storage_name, filesystem, quota_map, user_map, dry_run=False):
    """Store the information in the user directories.
    """
    exceeding_users = []
    login_mount_point = storage[storage_name].login_mount_point
    gpfs_mount_point = storage[storage_name].gpfs_mount_point

    for (user_id, quota) in quota_map.items():

        user_name = user_map.get(int(user_id), None)

        if user_name and user_name.startswith('vsc4'):
            user = VscUser(user_name)
            logger.debug("Checking quota for user %s with ID %s" % (user_name, user_id))
            logger.debug("User %s quota: %s" % (user, quota))

            path = user._get_path(storage_name)

            # FIXME: We need some better way to address this
            # Right now, we replace the nfs mount prefix which the symlink points to
            # with the gpfs mount point. this is a workaround until we resolve the
            # symlink problem once we take new default scratch into production
            if gpfs.is_symlink(path):
                target = os.path.realpath(path)
                if target.startswith(login_mount_point):
                    new_path = target.replace(login_mount_point, gpfs_mount_point, 1)
                    logger.info("Found a symlinked path %s to the nfs mount point %s. Replaced with %s" %
                                (path, login_mount_point, gpfs_mount_point))
            else:
                new_path = path

            path_stat = os.stat(new_path)
            filename = os.path.join(new_path, ".quota_user.json.gz")

            if dry_run:
                logger.info("Dry run: would update cache for %s at %s with %s" % (storage_name, path, "%s" % (quota,)))
                logger.info("Dry run: would chmod 640 %s" % (filename,))
                logger.info("Dry run: would chown %s to %s %s" % (filename, path_stat.st_uid, path_stat.st_gid))
            else:
                cache = FileCache(filename)
                cache.update(key="quota", data=quota, threshold=0)
                cache.update(key="storage_name", data=storage_name, threshold=0)
                cache.close()

                gpfs.ignorerealpathmismatch = True
                gpfs.chmod(0640, filename)
                gpfs.chown(path_stat.st_uid, path_stat.st_uid, filename)
                gpfs.ignorerealpathmismatch = False

            logger.info("Stored user %s quota for storage %s at %s" % (user_name, storage_name, filename))

            if quota.exceeds():
                exceeding_users.append((user_name, quota))

    return exceeding_users
Ejemplo n.º 29
0
def read_cache(owner, showvo, running, idle, blocked, path):
    """
    Unpickle the file and fill in the resulting datastructure.
    """
    cache = FileCache(path)
    res = cache.load('showq')[1][0]
    user_map = cache.load('showq')[1][1]
    # check for timeinfo
    if res['timeinfo'] < (time.time() - MAXIMAL_AGE):
        print "The data in the showq cache may be outdated. Please contact your admin to look into this."
    #    return (None, None)

    del res['timeinfo']

    logger.debug("Resulting cache data: %s" % (res))

    # Filter out data that is not needed
    if not showvo:
        for user in res.keys():
            if not user == owner:
                pass

    for user in res.keys():
        for host in res[user].keys():
            logger.debug("looking at host %s" % (host))
            states = res[user][host].keys()
            if not running:
                if 'Running' in states:
                    del res[user][host]['Running']
            if not idle:
                if 'Idle' in states:
                    del res[user][host]['Idle']
            if not blocked:
                for state in [
                        x for x in states if x not in ('Running', 'Idle')
                ]:
                    del res[user][host][state]

    return (res, user_map)
Ejemplo n.º 30
0
def store_on_gpfs(user_name, path, key, information, gpfs, login_mount_point, gpfs_mount_point, filename,
                  dry_run=False):
    """
    Store the given information in a cache file that resides in a user's directory.

    @type user_name: string
    @type path: string, representing a directory
    @type key: string, name for the kind of information we are going to store in the cache
    @type showq_information: a recursive dict structure
    @type gpfs: GpfsOperations instance
    @type login_mount_point: path representing the mount point of the storage location on the login nodes
    @type gpfs_mount_point: path representing the mount point of the storage location when GPFS mounted
    @type dry_run: boolean
    """

    if user_name and user_name.startswith('vsc4'):
        logger.debug("Storing %s information for user %s", key, user_name)
        logger.debug("information: %s", information)
        logger.debug("path for storing information would be %s", path)

        # FIXME: We need some better way to address this
        # Right now, we replace the nfs mount prefix which the symlink points to
        # with the gpfs mount point. this is a workaround until we resolve the
        # symlink problem once we take new default scratch into production
        if gpfs.is_symlink(path):
            target = os.path.realpath(path)
            logger.debug("path is a symlink, target is %s", target)
            logger.debug("login_mount_point is %s", login_mount_point)
            if target.startswith(login_mount_point):
                new_path = target.replace(login_mount_point, gpfs_mount_point, 1)
                logger.info("Found a symlinked path %s to the nfs mount point %s. Replaced with %s",
                            path, login_mount_point, gpfs_mount_point)
            else:
                logger.warning("Unable to store quota information for %s on %s; symlink cannot be resolved properly",
                               user_name, path)
        else:
            new_path = path

        path_stat = os.stat(new_path)
        filename = os.path.join(new_path, filename)

        if dry_run:
            logger.info("Dry run: would update cache for at %s with %s", new_path, information)
            logger.info("Dry run: would chmod 640 %s", filename)
            logger.info("Dry run: would chown %s to %s %s", filename, path_stat.st_uid, path_stat.st_gid)
        else:
            cache = FileCache(filename, False)  # data need not be retained
            cache.update(key=key, data=information, threshold=0)
            cache.close()

            gpfs.ignorerealpathmismatch = True
            gpfs.chmod(0o640, filename)
            gpfs.chown(path_stat.st_uid, path_stat.st_uid, filename)
            gpfs.ignorerealpathmismatch = False

        logger.info("Stored user %s %s information at %s", user_name, key, filename)
Ejemplo n.º 31
0
    def test_value_error(self, mock_decode):
        "Test to see that a ValueError upon decoding gets caught correctly"
        tempdir = tempfile.mkdtemp()
        # create a tempfilename
        (handle, filename) = tempfile.mkstemp(dir=tempdir)
        f = os.fdopen(handle, 'wb')
        g = gzip.GzipFile(mode='wb', fileobj=f)
        g.write(b'blabla no json gzip stuffz')
        g.close()

        e = ValueError('unable to find valid JSON')
        mock_decode.side_effect = e

        fc = FileCache(filename)

        self.assertTrue(fc.shelf == {})
        shutil.rmtree(tempdir)
Ejemplo n.º 32
0
def write_timestamp(filename, timestamp):
    """Write the given timestamp to a pickled file.

    @type timestamp: datetime.datetime timestamp
    """

    if timestamp.tzinfo is None:
        # add local timezoneinfo
        timestamp = timestamp.replace(tzinfo=Local)

    cache = FileCache(filename)
    cache.update(0, timestamp, 0)
    cache.close()
Ejemplo n.º 33
0
def write_timestamp(filename, timestamp):
    """Write the given timestamp to a pickled file.

    @type timestamp: datetime.datetime timestamp
    """

    if isinstance(timestamp, datetime.datetime) and timestamp.tzinfo is None:
        # add local timezoneinfo
        timestamp_ = timestamp.replace(tzinfo=utc)
        (_, timestamp_) = convert_timestamp(timestamp)
    else:
        timestamp_ = timestamp

    cache = FileCache(filename)
    cache.update('timestamp', timestamp_, 0)
    cache.close()
Ejemplo n.º 34
0
def process_fileset_quota(storage, gpfs, storage_name, filesystem, quota_map, opener, url, access_token, dry_run=False):
    """Store the quota information in the filesets.
    """

    filesets = gpfs.list_filesets()
    exceeding_filesets = []

    log_vo_quota_to_django(storage_name, quota_map, opener, url, access_token, dry_run)

    logger.info("filesets = %s" % (filesets))

    payload = []
    for (fileset, quota) in quota_map.items():
        fileset_name = filesets[filesystem][fileset]['filesetName']
        logger.debug("Fileset %s quota: %s" % (fileset_name, quota))

        path = filesets[filesystem][fileset]['path']
        filename = os.path.join(path, ".quota_fileset.json.gz")
        path_stat = os.stat(path)

        if dry_run:
            logger.info("Dry run: would update cache for %s at %s with %s" % (storage_name, path, "%s" % (quota,)))
            logger.info("Dry run: would chmod 640 %s" % (filename,))
            logger.info("Dry run: would chown %s to %s %s" % (filename, path_stat.st_uid, path_stat.st_gid))
        else:
            # TODO: This should somehow be some atomic operation.
            cache = FileCache(filename, False)
            cache.update(key="quota", data=quota, threshold=0)
            cache.update(key="storage_name", data=storage_name, threshold=0)
            cache.close()

            gpfs.chmod(0640, filename)
            gpfs.chown(path_stat.st_uid, path_stat.st_gid, filename)

        logger.info("Stored fileset %s [%s] quota for storage %s at %s" % (fileset, fileset_name, storage, filename))

        if quota.exceeds():
            exceeding_filesets.append((fileset_name, quota))

    return exceeding_filesets
Ejemplo n.º 35
0
    def _store_pickle_cluster_file(self, host, output, raw=True):
        """Store the result of the showq command in the relevant pickle file.

        @type output: string

        @param output: showq output information
        """

        dest = os.path.join(self._cache_pickle_directory(), self._cache_pickle_name(host))

        if not self.dry_run:
            if raw:
                f = open(dest, 'w')
                cPickle.dump(output, f)
                f.close()
            else:
                cache = FileCache(dest)
                cache.update(self.cache_key, output, 0)  # no retention of old data
                cache.close()
        else:
            self.logger.info("Dry run: skipping actually storing pickle files for cluster data")
Ejemplo n.º 36
0
    def write_donefile(self, values):
        """ Write a cachefile with some stats about the run when done """

        cache_file = FileCache(self.done_file)
        cache_file.update('stats', values, 0)
        cache_file.close()
Ejemplo n.º 37
0
def process_hold(clusters, dry_run=False):
    """Process a filtered queueinfo dict"""
    releasejob_cache = FileCache(RELEASEJOB_CACHE_FILE)

    # get the showq data
    for data in clusters.values():
        data['path'] = data['spath']  # showq path
    showq = Showq(clusters, cache_pickle=True)
    (queue_information, _, _) = showq.get_moab_command_information()

    # release the jobs, prepare the command
    m = MoabCommand(cache_pickle=False, dry_run=dry_run)
    for data in clusters.values():
        data['path'] = data['mpath']  # mjobctl path
    m.clusters = clusters

    # read the previous data
    ts_data = releasejob_cache.load('queue_information')
    if ts_data is None:
        old_queue_information = {}
    else:
        (_, old_queue_information) = ts_data

    stats = {
        'peruser': 0,
        'total': 0,
        'release': 0,
    }

    release_jobids = []

    for user, clusterdata in queue_information.items():
        oldclusterdata = old_queue_information.setdefault(user, {})
        totaluser = 0
        for cluster, data in clusterdata.items():
            olddata = oldclusterdata.setdefault(cluster, {})
            # DRMJID is supposed to be unique
            # get all oldjobids in one dict
            oldjobs = dict([(j['DRMJID'], j['_release'])
                            for jt in olddata.values() for j in jt])
            for jobtype, jobs in data.items():
                removeids = []
                for idx, job in enumerate(jobs):
                    jid = job['DRMJID']

                    if jobtype in RELEASEJOB_SUPPORTED_HOLDTYPES:
                        totaluser += 1
                        release = max(oldjobs.get(jid, 0), 0) + 1
                        job['_release'] = release
                        stats['release'] = max(stats['release'], release)
                        release_jobids.append(jid)
                        # release the job
                        cmd = [m.clusters[cluster]['path'], '-u', jid]
                        logger.info(
                            "Releasing job %s cluster %s for the %s-th time." %
                            (jid, cluster, release))
                        if dry_run:
                            logger.info("Dry run %s" % cmd)
                        else:
                            m._run_moab_command(cmd, cluster, [])
                    else:
                        # keep historical data, eg a previously released job could be idle now
                        # but keep the counter in case it gets held again
                        try:
                            release = oldjobs[jid]
                            job['_release'] = release
                        except KeyError:
                            # not previously in hold, remove it
                            removeids.append(idx)

                # remove the jobs (in reverse order)
                for remove_idx in removeids[::-1]:
                    jobs.pop(remove_idx)

                # cleanup
                if len(jobs) == 0:
                    data.pop(jobtype)
            # cleanup
            if len(data) == 0:
                clusterdata.pop(cluster)
        # cleanup
        if len(clusterdata) == 0:
            queue_information.pop(user)

        # update stats
        stats['peruser'] = max(stats['peruser'], totaluser)
        stats['total'] += totaluser

    logger.info(
        "Release statistics: total jobs in hold %(total)s; max in hold per user %(peruser)s; max releases per job %(release)s"
        % stats)

    # update and close
    releasejob_cache.update('queue_information', queue_information, 0)
    releasejob_cache.close()

    return release_jobids, stats
Ejemplo n.º 38
0
def process_hold(clusters, dry_run=False):
    """Process a filtered queueinfo dict"""
    releasejob_cache = FileCache(RELEASEJOB_CACHE_FILE)

    # get the showq data
    for data in clusters.values():
        data['path'] = data['spath']  # showq path
    showq = Showq(clusters, cache_pickle=True)
    (queue_information, _, _) = showq.get_moab_command_information()

    # release the jobs, prepare the command
    m = MoabCommand(cache_pickle=False, dry_run=dry_run)
    for data in clusters.values():
        data['path'] = data['mpath']  # mjobctl path
    m.clusters = clusters

    # read the previous data
    ts_data = releasejob_cache.load('queue_information')
    if ts_data is None:
        old_queue_information = {}
    else:
        (_, old_queue_information) = ts_data

    stats = {
        'peruser': 0,
        'total': 0,
        'release': 0,
    }

    release_jobids = []

    for user, clusterdata in queue_information.items():
        oldclusterdata = old_queue_information.setdefault(user, {})
        totaluser = 0
        for cluster, data in clusterdata.items():
            olddata = oldclusterdata.setdefault(cluster, {})
            # DRMJID is supposed to be unique
            # get all oldjobids in one dict
            oldjobs = dict([(j['DRMJID'], j['_release']) for jt in olddata.values() for j in jt])
            for jobtype, jobs in data.items():
                removeids = []
                for idx, job in enumerate(jobs):
                    jid = job['DRMJID']

                    if jobtype in RELEASEJOB_SUPPORTED_HOLDTYPES:
                        totaluser += 1
                        release = max(oldjobs.get(jid, 0), 0) + 1
                        job['_release'] = release
                        stats['release'] = max(stats['release'], release)
                        release_jobids.append(jid)
                        # release the job
                        cmd = [m.clusters[cluster]['path'], '-u', jid]
                        logger.info("Releasing job %s cluster %s for the %s-th time." % (jid, cluster, release))
                        if dry_run:
                            logger.info("Dry run %s" % cmd)
                        else:
                            m._run_moab_command(cmd, cluster, [])
                    else:
                        # keep historical data, eg a previously released job could be idle now
                        # but keep the counter in case it gets held again
                        try:
                            release = oldjobs[jid]
                            job['_release'] = release
                        except KeyError:
                            # not previously in hold, remove it
                            removeids.append(idx)

                # remove the jobs (in reverse order)
                for remove_idx in removeids[::-1]:
                    jobs.pop(remove_idx)

                # cleanup
                if len(jobs) == 0:
                    data.pop(jobtype)
            # cleanup
            if len(data) == 0:
                clusterdata.pop(cluster)
        # cleanup
        if len(clusterdata) == 0:
            queue_information.pop(user)

        # update stats
        stats['peruser'] = max(stats['peruser'], totaluser)
        stats['total'] += totaluser

    logger.info("Release statistics: total jobs in hold %(total)s; max in hold per user %(peruser)s; max releases per job %(release)s" % stats)

    # update and close
    releasejob_cache.update('queue_information', queue_information, 0)
    releasejob_cache.close()

    return release_jobids, stats
Ejemplo n.º 39
0
    def test_save_and_load(self, data, threshold):
        """Check if the loaded data is the same as the saved data."""

        # create a tempfilename
        (handle, filename) = tempfile.mkstemp()
        os.unlink(filename)
        cache = FileCache(filename)
        for (key, value) in data.items():
            cache.update(key, value, threshold)
        cache.close()

        now = time.time()
        new_cache = FileCache(filename)
        for key in data.keys():
            info = cache.load(key)
            self.assertTrue(info is not None)
            (ts, value) = info
            self.assertTrue(value == data[key])
            self.assertTrue(ts <= now)
        new_cache.close()

        os.unlink(filename)
Ejemplo n.º 40
0
    def test_save_and_load(self):
        """Check if the loaded data is the same as the saved data."""
        # test with random data
        data, threshold = get_rand_data()
        tempdir = tempfile.mkdtemp()
        # create a tempfilename
        (handle, filename) = tempfile.mkstemp(dir=tempdir)
        os.close(handle)
        shutil.rmtree(tempdir)
        cache = FileCache(filename)
        for (key, value) in data.items():
            cache.update(key, value, threshold)
        cache.close()

        now = time.time()
        new_cache = FileCache(filename)
        for key in data.keys():
            info = cache.load(key)
            self.assertTrue(info is not None)
            (ts, value) = info
            self.assertTrue(value == data[key])
            self.assertTrue(ts <= now)
        new_cache.close()

        shutil.rmtree(tempdir)
Ejemplo n.º 41
0
    def write_donefile(self, values):
        """ Write a cachefile with some stats about the run when done """

        cache_file = FileCache(self.done_file)
        cache_file.update('stats', values, 0)
        cache_file.close()