def notify_exceeding_items(gpfs, storage, filesystem, exceeding_items, target, dry_run=False): """Send out notification to the fileset owners. - if the fileset belongs to a VO: the VO moderator - if the fileset belongs to a project: the project moderator - if the fileset belongs to a user: the user The information is cached. The mail is sent in the following cases: - the excession is new - the excession occurred more than 7 days ago and stayed in the cache. In this case, the cache is updated as to avoid sending outdated mails repeatedly. """ cache_path = os.path.join(gpfs.list_filesystems()[filesystem]['defaultMountPoint'], ".quota_%s_cache.json.gz" % (target)) cache = FileCache(cache_path, True) # we retain the old data logger.info("Processing %d exceeding items" % (len(exceeding_items))) for (item, quota) in exceeding_items: updated = cache.update(item, quota, QUOTA_NOTIFICATION_CACHE_THRESHOLD) logger.info("Storage %s: cache entry for %s was updated: %s" % (storage, item, updated)) if updated: notify(storage, item, quota, dry_run) if not dry_run: cache.close() else: logger.info("Dry run: not saving the updated cache")
def print_vo_quota(opts, storage, vos, now): """ Print the quota for the VO fileset. """ print "\nVO quota:" for storage_name in [s for s in opts.options.storage if s != 'VSC_HOME']: # No VOs on VSC_HOME atm mount_point = storage[storage_name].login_mount_point path_template = storage.path_templates[storage_name]['vo'] path = os.path.join(mount_point, path_template[0], path_template[1](vos[0]), ".quota_fileset.json.gz") cache = FileCache(path, True) try: (timestamp, quota) = cache.load('quota') except TypeError: logger.debug("Cannot load data from %s" % (path,)) print "%s: WARNING: No VO quota information found" % (storage_name,) continue if now - timestamp > opts.options.threshold: print "%s: WARNING: no recent VO quota information (age of data is %d minutes)" % (storage_name, (now-timestamp)/60) else: for (fileset, qi) in quota.quota_map.items(): pp = quota_pretty_print(storage_name, fileset, qi, opts.options.fileset_prefixes) if pp: print pp
def print_user_quota(opts, storage, user_name, now): """ Print the quota for the user, i.e., USR quota in all filesets the user has access to. """ print "User quota:" for storage_name in opts.options.storage: mount_point = storage[storage_name].login_mount_point path_template = storage.path_templates[storage_name]['user'] path = os.path.join(mount_point, path_template[0], path_template[1](user_name), ".quota_user.json.gz") cache = FileCache(path, True) try: (timestamp, quota) = cache.load('quota') except TypeError: logger.debug("Cannot load data from %s" % (path,)) print "%s: WARNING: No quota information found" % (storage_name,) continue if now - timestamp > opts.options.threshold: print "%s: WARNING: no recent quota information (age of data is %d minutes)" % (storage_name, (now-timestamp)/60) else: for (fileset, qi) in quota.quota_map.items(): pp = quota_pretty_print(storage_name, fileset, qi, opts.options.fileset_prefixes) if pp: print pp
def report_and_exit(self): """Unzips the cache file and reads the JSON data back in, prints the data and exits accordingly. If the cache data is too old (now - cache timestamp > self.threshold), a critical exit is produced. """ try: nagios_cache = FileCache(self.filename, True) except: self.log.critical("Error opening file %s for reading" % (self.filename)) unknown_exit("%s nagios gzipped JSON file unavailable (%s)" % (self.header, self.filename)) (timestamp, ((nagios_exit_code, nagios_exit_string), nagios_message)) = nagios_cache.load('nagios') nagios_cache.close() if self.threshold <= 0 or time.time() - timestamp < self.threshold: self.log.info("Nagios check cache file %s contents delivered: %s" % (self.filename, nagios_message)) print "%s %s" % (nagios_exit_string, nagios_message) sys.exit(nagios_exit_code) else: unknown_exit("%s gzipped JSON file too old (timestamp = %s)" % (self.header, time.ctime(timestamp)))
def main(): options = { 'storage': ('the VSC filesystems that are checked by this script', None, 'extend', []), 'threshold': ('allowed the time difference between the cached quota and the time of running', None, 'store', DEFAULT_ALLOWED_TIME_THRESHOLD), } opts = simple_option(options, config_files='/etc/quota_information.conf') storage = VscStorage() user_name = getpwuid(os.getuid())[0] now = time.time() for storage_name in opts.options.storage: mount_point = storage[storage_name].login_mount_point path_template = storage.path_templates[storage_name]['user'] path = os.path.join(mount_point, path_template[0], path_template(user_name)) cache = FileCache(path) (timestamp, quota) = cache.load('quota') if now - timestamp > opts.options.threshold: print "%s: WARNING: no recent quota information (age of data is %d minutes)" % (storage_name, (now-timestamp)/60) else: for (fileset, qi) in quota.quota_map.items(): print "%s: used %d MiB (%d%%) quota %d MiB in fileset %d" % (storage_name, quota) if __name__ == '__main__': main()
def cache(self, nagios_exit, nagios_message): """Store the result in the cache file with a timestamp. @type nagios_exit: one of NAGIOS_EXIT_OK, NAGIOS_EXIT_WARNING, NAGIOS_EXIT_CRTITCAL or NAGIOS_EXIT_UNKNOWN @type nagios_message: string @param nagios_exit: a valid nagios exit code. @param nagios_message: the message to print out when the actual check runs. """ try: nagios_cache = FileCache(self.filename) nagios_cache.update(0, (nagios_exit, nagios_message), 0) # always update nagios_cache.close() self.log.info("Wrote nagios check cache file %s at about %s" % (self.filename, time.ctime(time.time()))) except: # raising an error is ok, since we usually do this as the very last thing in the script self.log.raiseException("Cannot save to the nagios pickled file (%s)" % (self.filename)) try: p = pwd.getpwnam(self.nagios_username) os.chmod(self.filename, stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IWGRP) os.chown(self.filename, p.pw_uid, p.pw_gid) except: self.log.raiseException("Cannot chown the nagios check file %s to the nagios user" % (self.filename)) return True
def store_on_gpfs(user_name, path, key, information, gpfs, login_mount_point, gpfs_mount_point, filename, dry_run=False): """ Store the given information in a cache file that resides in a user's directory. @type user_name: string @type path: string, representing a directory @type key: string, name for the kind of information we are going to store in the cache @type showq_information: a recursive dict structure @type gpfs: GpfsOperations instance @type login_mount_point: path representing the mount point of the storage location on the login nodes @type gpfs_mount_point: path representing the mount point of the storage location when GPFS mounted @type dry_run: boolean """ if user_name and user_name.startswith('vsc4'): logger.debug("Storing %s information for user %s" % (key, user_name,)) logger.debug("information: %s" % (information,)) logger.debug("path for storing information would be %s" % (path,)) # FIXME: We need some better way to address this # Right now, we replace the nfs mount prefix which the symlink points to # with the gpfs mount point. this is a workaround until we resolve the # symlink problem once we take new default scratch into production if gpfs.is_symlink(path): target = os.path.realpath(path) logger.debug("path is a symlink, target is %s" % (target,)) logger.debug("login_mount_point is %s" % (login_mount_point,)) if target.startswith(login_mount_point): new_path = target.replace(login_mount_point, gpfs_mount_point, 1) logger.info("Found a symlinked path %s to the nfs mount point %s. Replaced with %s" % (path, login_mount_point, gpfs_mount_point)) else: logger.warning("Unable to store quota information for %s on %s; symlink cannot be resolved properly" % (user_name, path)) else: new_path = path path_stat = os.stat(new_path) filename = os.path.join(new_path, filename) if dry_run: logger.info("Dry run: would update cache for at %s with %s" % (new_path, "%s" % (information,))) logger.info("Dry run: would chmod 640 %s" % (filename,)) logger.info("Dry run: would chown %s to %s %s" % (filename, path_stat.st_uid, path_stat.st_gid)) else: cache = FileCache(filename, False) # data need not be retained cache.update(key=key, data=information, threshold=0) cache.close() gpfs.ignorerealpathmismatch = True gpfs.chmod(0o640, filename) gpfs.chown(path_stat.st_uid, path_stat.st_uid, filename) gpfs.ignorerealpathmismatch = False logger.info("Stored user %s %s information at %s" % (user_name, key, filename))
def read_timestamp(filename): """Read the stored timestamp value from a pickled file. @returns: string representing a timestamp in the proper LDAP time format """ cache = FileCache(filename) (_, timestamp) = cache.load('timestamp') return timestamp
def test_save_and_load(self): """Check if the loaded data is the same as the saved data.""" # test with random data data, threshold = get_rand_data() # create a tempfilename (handle, filename) = tempfile.mkstemp() os.unlink(filename) os.close(handle) cache = FileCache(filename) for (key, value) in data.items(): cache.update(key, value, threshold) cache.close() now = time.time() new_cache = FileCache(filename) for key in data.keys(): info = cache.load(key) self.assertTrue(info is not None) (ts, value) = info self.assertTrue(value == data[key]) self.assertTrue(ts <= now) new_cache.close() os.unlink(filename)
def write_timestamp(filename, timestamp): """Write the given timestamp to a pickled file. @type timestamp: datetime.datetime timestamp """ if timestamp.tzinfo is None: # add local timezoneinfo timestamp = timestamp.replace(tzinfo=Local) cache = FileCache(filename) cache.update(0, timestamp, 0) cache.close()
def test_wirte_donefile(self): """ Test the writing of the values to a cache file when done""" donefile = "/tmp/done" values = { 'completed' : 50, 'failed' : 5, 'unfinished' : 0 } zkclient = RsyncSource('dummy', session='new', netcat=True, rsyncpath='/path/dummy', rsyncdepth=2, done_file=donefile) zkclient.write_donefile(values) cache_file = FileCache(donefile) (timestamp, stats) = cache_file.load('stats') self.assertEqual(values, stats)
def read_timestamp(filename): """Read the stored timestamp value from a pickled file. @returns: string representing a timestamp in the proper LDAP time format """ cache = FileCache(filename) (_, timestamp) = cache.load(0) if not timestamp is None and timestamp.tzinfo is None: # add local timezoneinfo timestamp = timestamp.replace(tzinfo=Local) return timestamp
def read_timestamp(filename): """Read the stored timestamp value from a pickled file. @returns: a timestamp in whatever format it was stored in (string LDAP timestamp, unix epoch, ...) """ cache = FileCache(filename) try: (_, timestamp) = cache.load('timestamp') except TypeError: logging.warning('could not load timestamp from cache file %s', filename) timestamp = None return timestamp
def test_wirte_donefile(self): """ Test the writing of the values to a cache file when done""" donefile = "/tmp/done" values = {'completed': 50, 'failed': 5, 'unfinished': 0} zkclient = RsyncSource('dummy', session='new', netcat=True, rsyncpath='/path/dummy', rsyncdepth=2, done_file=donefile) zkclient.write_donefile(values) cache_file = FileCache(donefile) (timestamp, stats) = cache_file.load('stats') self.assertEqual(values, stats)
def read_cache(path): """ Unpickle the file and fill in the resulting datastructure. """ try: cache = FileCache(path) except Exception: print "Failed to load checkjob information from %s" % (path,) res = cache.load('checkjob') if res[0] < (time.time() - MAXIMAL_AGE): print "The data in the checkjob cache may be outdated. Please contact your admin to look into this." return res[1] # CheckjobInfo
def read_cache(path): """ Unpickle the file and fill in the resulting datastructure. """ try: cache = FileCache(path) except Exception: print "Failed to load checkjob information from %s" % (path, ) res = cache.load('checkjob') if res[0] < (time.time() - MAXIMAL_AGE): print "The data in the checkjob cache may be outdated. Please contact your admin to look into this." return res[1] # CheckjobInfo
def test_contents(self, data, threshold): """Check that the contents of the cache is what is expected prior to closing it.""" # create a tempfilename (handle, filename) = tempfile.mkstemp(dir='/tmp') os.unlink(filename) cache = FileCache(filename) for (key, value) in data.items(): cache.update(key, value, threshold) now = time.time() for key in data.keys(): info = cache.load(key) self.assertFalse(info is None) (ts, value) = info self.assertTrue(value == data[key]) self.assertTrue(ts <= now)
def write_timestamp(filename, timestamp): """Write the given timestamp to a pickled file. @type timestamp: datetime.datetime timestamp """ if isinstance(timestamp, datetime.datetime) and timestamp.tzinfo is None: # add local timezoneinfo timestamp_ = timestamp.replace(tzinfo=Local) (_, timestamp_) = convert_timestamp(timestamp) else: timestamp_ = timestamp cache = FileCache(filename) cache.update('timestamp', timestamp_, 0) cache.close()
def process_user_quota(gpfs, storage, filesystem, quota_map, user_map): """Store the information in the user directories. """ exceeding_users = [] for (user_id, quota) in quota_map.items(): user_name = user_map.get(int(user_id), None) logger.debug("Checking quota for user %s with ID %s" % (user_name, user_id)) if user_name and user_name.startswith('vsc'): user = VscUser(user_name) logger.debug("User %s quota: %s" % (user, quota)) path = user._get_path(storage) path_stat = os.stat(path) filename = os.path.join(path, ".quota_user.json.gz") cache = FileCache(filename) cache.update(key="quota", data=quota, threshold=0) cache.update(key="storage", data=storage, threshold=0) cache.close() gpfs.ignorerealpathmismatch = True gpfs.chmod(0640, filename) gpfs.chown(path_stat.st_uid, path_stat.st_uid, filename) gpfs.ignorerealpathmismatch = False logger.info("Stored user %s quota for storage %s at %s" % (user_name, storage, filename)) if quota.exceeds(): exceeding_users.append((user, quota)) return exceeding_users
def process_fileset_quota(gpfs, storage, filesystem, quota_map): """Store the quota information in the filesets. """ filesets = gpfs.list_filesets() exceeding_filesets = [] logger.info("filesets = %s" % (filesets)) for (fileset, quota) in quota_map.items(): logger.debug("Fileset %s quota: %s" % (filesets[filesystem][fileset]['filesetName'], quota)) path = filesets[filesystem][fileset]['path'] filename = os.path.join(path, ".quota_fileset.json.gz") path_stat = os.stat(path) # TODO: This should somehow be some atomic operation. cache = FileCache(filename) cache.update(key="quota", data=quota, threshold=0) cache.update(key="storage", data=storage, threshold=0) cache.close() gpfs.chmod(0640, filename) gpfs.chown(path_stat.st_uid, path_stat.st_gid, filename) logger.info("Stored fileset %s quota for storage %s at %s" % (fileset, storage, filename)) #if quota.exceeds(): if True: exceeding_filesets.append((fileset, quota)) return exceeding_filesets
def _load_pickle_cluster_file(self, host, raw=True): """Load the data from the pickled files. @type host: string @param host: cluster for which we load data @returns: representation of the showq output. """ source = os.path.join(self._cache_pickle_directory(), self._cache_pickle_name(host)) if raw: f = open(source, 'r') output = cPickle.load(f) f.close() return output else: cache = FileCache(source) return cache.load(self.cache_key)
def read_cache(owner, showvo, running, idle, blocked, path): """ Unpickle the file and fill in the resulting datastructure. """ try: cache = FileCache(path) except: print "Failed to load showq information from %s" % (path,) res = cache.load('showq')[1][0] user_map = cache.load('showq')[1][1] ## check for timeinfo if res['timeinfo'] < (time.time() - MAXIMAL_AGE): print "The data in the showq cache may be outdated. Please contact your admin to look into this." # return (None, None) del res['timeinfo'] logger.debug("Resulting cache data: %s" % (res)) # Filter out data that is not needed if not showvo: for user in res.keys(): if not user == owner: #del res[user] pass for user in res.keys(): for host in res[user].keys(): logger.debug("looking at host %s" % (host)) states = res[user][host].keys() if not running: if 'Running' in states: del res[user][host]['Running'] if not idle: if 'Idle' in states: del res[user][host]['Idle'] if not blocked: for state in [x for x in states if not x in ('Running','Idle')]: del res[user][host][state] return (res, user_map)
def test_corrupt_gz_cache(self): """Test to see if we can handle a corrupt cache file""" tempdir = tempfile.mkdtemp() # create a tempfilename (handle, filename) = tempfile.mkstemp(dir=tempdir) f = os.fdopen(handle, 'w') f.write('blabla;not gz') f.close() FileCache(filename) shutil.rmtree(tempdir)
def report_and_exit(self): """Unpickles the cache file, prints the data and exits accordingly. If the cache data is too old (now - cache timestamp > self.threshold), a critical exit is produced. """ try: nagios_cache = FileCache(self.filename, True) except: self.log.critical("Error opening file %s for reading" % (self.filename)) unknown_exit("%s nagios pickled file unavailable (%s)" % (self.header, self.filename)) (timestamp, ((nagios_exit_code, nagios_exit_string), nagios_message)) = nagios_cache.load(0) nagios_cache.close() if self.threshold < 0 or time.time() - timestamp < self.threshold: self.log.info("Nagios check cache file %s contents delivered: %s" % (self.filename, nagios_message)) print "%s %s" % (nagios_exit_string, nagios_message) sys.exit(nagios_exit_code) else: unknown_exit("%s pickled file too old (timestamp = %s)" % (self.header, time.ctime(timestamp)))
def cache(self, nagios_exit, nagios_message): """Store the result in the cache file with a timestamp. @type nagios_exit: one of NAGIOS_EXIT_OK, NAGIOS_EXIT_WARNING, NAGIOS_EXIT_CRTITCAL or NAGIOS_EXIT_UNKNOWN @type nagios_message: string @param nagios_exit: a valid nagios exit code. @param nagios_message: the message to print out when the actual check runs. """ try: nagios_cache = FileCache(self.filename) nagios_cache.update(0, (nagios_exit, nagios_message), 0) # always update nagios_cache.close() self.log.info("Wrote nagios check cache file %s at about %s" % (self.filename, time.ctime(time.time()))) except: # raising an error is ok, since we usually do this as the very last thing in the script self.log.raiseException( "Cannot save to the nagios pickled file (%s)" % (self.filename)) try: p = pwd.getpwnam(self.nagios_username) os.chmod(self.filename, stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IWGRP) os.chown(self.filename, p.pw_uid, p.pw_gid) except: self.log.raiseException( "Cannot chown the nagios check file %s to the nagios user" % (self.filename)) return True
def cache(self, nagios_exit, nagios_message): """Store the result in the cache file with a timestamp. @type nagios_exit: one of NAGIOS_EXIT_OK, NAGIOS_EXIT_WARNING, NAGIOS_EXIT_CRTITCAL or NAGIOS_EXIT_UNKNOWN @type nagios_message: string @param nagios_exit: a valid nagios exit code. @param nagios_message: the message to print out when the actual check runs. """ try: nagios_cache = FileCache(self.filename) nagios_cache.update('nagios', (nagios_exit, nagios_message), 0) # always update nagios_cache.close() self.log.info("Wrote nagios check cache file %s at about %s" % (self.filename, time.ctime(time.time()))) except (IOError, OSError): # raising an error is ok, since we usually do this as the very last thing in the script self.log.raiseException("Cannot save to the nagios gzipped JSON file (%s)" % (self.filename)) try: p = pwd.getpwnam(self.nagios_username) if self.world_readable: os.chmod(self.filename, stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IWGRP | stat.S_IROTH) else: os.chmod(self.filename, stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IWGRP) # only change owner/group when run as root if os.geteuid() == 0: os.chown(self.filename, p.pw_uid, p.pw_gid) else: self.log.warn("Not running as root: Cannot chown the nagios check file %s to %s" % (self.filename, self.nagios_username)) except OSError: self.log.raiseException("Cannot chown the nagios check file %s to the nagios user" % (self.filename)) return True
def process_user_quota(storage, gpfs, storage_name, filesystem, quota_map, user_map, dry_run=False): """Store the information in the user directories. """ exceeding_users = [] login_mount_point = storage[storage_name].login_mount_point gpfs_mount_point = storage[storage_name].gpfs_mount_point for (user_id, quota) in quota_map.items(): user_name = user_map.get(int(user_id), None) if user_name and user_name.startswith('vsc4'): user = VscUser(user_name) logger.debug("Checking quota for user %s with ID %s" % (user_name, user_id)) logger.debug("User %s quota: %s" % (user, quota)) path = user._get_path(storage_name) # FIXME: We need some better way to address this # Right now, we replace the nfs mount prefix which the symlink points to # with the gpfs mount point. this is a workaround until we resolve the # symlink problem once we take new default scratch into production if gpfs.is_symlink(path): target = os.path.realpath(path) if target.startswith(login_mount_point): new_path = target.replace(login_mount_point, gpfs_mount_point, 1) logger.info("Found a symlinked path %s to the nfs mount point %s. Replaced with %s" % (path, login_mount_point, gpfs_mount_point)) else: new_path = path path_stat = os.stat(new_path) filename = os.path.join(new_path, ".quota_user.json.gz") if dry_run: logger.info("Dry run: would update cache for %s at %s with %s" % (storage_name, path, "%s" % (quota,))) logger.info("Dry run: would chmod 640 %s" % (filename,)) logger.info("Dry run: would chown %s to %s %s" % (filename, path_stat.st_uid, path_stat.st_gid)) else: cache = FileCache(filename) cache.update(key="quota", data=quota, threshold=0) cache.update(key="storage_name", data=storage_name, threshold=0) cache.close() gpfs.ignorerealpathmismatch = True gpfs.chmod(0640, filename) gpfs.chown(path_stat.st_uid, path_stat.st_uid, filename) gpfs.ignorerealpathmismatch = False logger.info("Stored user %s quota for storage %s at %s" % (user_name, storage_name, filename)) if quota.exceeds(): exceeding_users.append((user_name, quota)) return exceeding_users
def read_cache(owner, showvo, running, idle, blocked, path): """ Unpickle the file and fill in the resulting datastructure. """ cache = FileCache(path) res = cache.load('showq')[1][0] user_map = cache.load('showq')[1][1] # check for timeinfo if res['timeinfo'] < (time.time() - MAXIMAL_AGE): print "The data in the showq cache may be outdated. Please contact your admin to look into this." # return (None, None) del res['timeinfo'] logger.debug("Resulting cache data: %s" % (res)) # Filter out data that is not needed if not showvo: for user in res.keys(): if not user == owner: pass for user in res.keys(): for host in res[user].keys(): logger.debug("looking at host %s" % (host)) states = res[user][host].keys() if not running: if 'Running' in states: del res[user][host]['Running'] if not idle: if 'Idle' in states: del res[user][host]['Idle'] if not blocked: for state in [ x for x in states if x not in ('Running', 'Idle') ]: del res[user][host][state] return (res, user_map)
def store_on_gpfs(user_name, path, key, information, gpfs, login_mount_point, gpfs_mount_point, filename, dry_run=False): """ Store the given information in a cache file that resides in a user's directory. @type user_name: string @type path: string, representing a directory @type key: string, name for the kind of information we are going to store in the cache @type showq_information: a recursive dict structure @type gpfs: GpfsOperations instance @type login_mount_point: path representing the mount point of the storage location on the login nodes @type gpfs_mount_point: path representing the mount point of the storage location when GPFS mounted @type dry_run: boolean """ if user_name and user_name.startswith('vsc4'): logger.debug("Storing %s information for user %s", key, user_name) logger.debug("information: %s", information) logger.debug("path for storing information would be %s", path) # FIXME: We need some better way to address this # Right now, we replace the nfs mount prefix which the symlink points to # with the gpfs mount point. this is a workaround until we resolve the # symlink problem once we take new default scratch into production if gpfs.is_symlink(path): target = os.path.realpath(path) logger.debug("path is a symlink, target is %s", target) logger.debug("login_mount_point is %s", login_mount_point) if target.startswith(login_mount_point): new_path = target.replace(login_mount_point, gpfs_mount_point, 1) logger.info("Found a symlinked path %s to the nfs mount point %s. Replaced with %s", path, login_mount_point, gpfs_mount_point) else: logger.warning("Unable to store quota information for %s on %s; symlink cannot be resolved properly", user_name, path) else: new_path = path path_stat = os.stat(new_path) filename = os.path.join(new_path, filename) if dry_run: logger.info("Dry run: would update cache for at %s with %s", new_path, information) logger.info("Dry run: would chmod 640 %s", filename) logger.info("Dry run: would chown %s to %s %s", filename, path_stat.st_uid, path_stat.st_gid) else: cache = FileCache(filename, False) # data need not be retained cache.update(key=key, data=information, threshold=0) cache.close() gpfs.ignorerealpathmismatch = True gpfs.chmod(0o640, filename) gpfs.chown(path_stat.st_uid, path_stat.st_uid, filename) gpfs.ignorerealpathmismatch = False logger.info("Stored user %s %s information at %s", user_name, key, filename)
def test_value_error(self, mock_decode): "Test to see that a ValueError upon decoding gets caught correctly" tempdir = tempfile.mkdtemp() # create a tempfilename (handle, filename) = tempfile.mkstemp(dir=tempdir) f = os.fdopen(handle, 'wb') g = gzip.GzipFile(mode='wb', fileobj=f) g.write(b'blabla no json gzip stuffz') g.close() e = ValueError('unable to find valid JSON') mock_decode.side_effect = e fc = FileCache(filename) self.assertTrue(fc.shelf == {}) shutil.rmtree(tempdir)
def write_timestamp(filename, timestamp): """Write the given timestamp to a pickled file. @type timestamp: datetime.datetime timestamp """ if isinstance(timestamp, datetime.datetime) and timestamp.tzinfo is None: # add local timezoneinfo timestamp_ = timestamp.replace(tzinfo=utc) (_, timestamp_) = convert_timestamp(timestamp) else: timestamp_ = timestamp cache = FileCache(filename) cache.update('timestamp', timestamp_, 0) cache.close()
def process_fileset_quota(storage, gpfs, storage_name, filesystem, quota_map, opener, url, access_token, dry_run=False): """Store the quota information in the filesets. """ filesets = gpfs.list_filesets() exceeding_filesets = [] log_vo_quota_to_django(storage_name, quota_map, opener, url, access_token, dry_run) logger.info("filesets = %s" % (filesets)) payload = [] for (fileset, quota) in quota_map.items(): fileset_name = filesets[filesystem][fileset]['filesetName'] logger.debug("Fileset %s quota: %s" % (fileset_name, quota)) path = filesets[filesystem][fileset]['path'] filename = os.path.join(path, ".quota_fileset.json.gz") path_stat = os.stat(path) if dry_run: logger.info("Dry run: would update cache for %s at %s with %s" % (storage_name, path, "%s" % (quota,))) logger.info("Dry run: would chmod 640 %s" % (filename,)) logger.info("Dry run: would chown %s to %s %s" % (filename, path_stat.st_uid, path_stat.st_gid)) else: # TODO: This should somehow be some atomic operation. cache = FileCache(filename, False) cache.update(key="quota", data=quota, threshold=0) cache.update(key="storage_name", data=storage_name, threshold=0) cache.close() gpfs.chmod(0640, filename) gpfs.chown(path_stat.st_uid, path_stat.st_gid, filename) logger.info("Stored fileset %s [%s] quota for storage %s at %s" % (fileset, fileset_name, storage, filename)) if quota.exceeds(): exceeding_filesets.append((fileset_name, quota)) return exceeding_filesets
def _store_pickle_cluster_file(self, host, output, raw=True): """Store the result of the showq command in the relevant pickle file. @type output: string @param output: showq output information """ dest = os.path.join(self._cache_pickle_directory(), self._cache_pickle_name(host)) if not self.dry_run: if raw: f = open(dest, 'w') cPickle.dump(output, f) f.close() else: cache = FileCache(dest) cache.update(self.cache_key, output, 0) # no retention of old data cache.close() else: self.logger.info("Dry run: skipping actually storing pickle files for cluster data")
def write_donefile(self, values): """ Write a cachefile with some stats about the run when done """ cache_file = FileCache(self.done_file) cache_file.update('stats', values, 0) cache_file.close()
def process_hold(clusters, dry_run=False): """Process a filtered queueinfo dict""" releasejob_cache = FileCache(RELEASEJOB_CACHE_FILE) # get the showq data for data in clusters.values(): data['path'] = data['spath'] # showq path showq = Showq(clusters, cache_pickle=True) (queue_information, _, _) = showq.get_moab_command_information() # release the jobs, prepare the command m = MoabCommand(cache_pickle=False, dry_run=dry_run) for data in clusters.values(): data['path'] = data['mpath'] # mjobctl path m.clusters = clusters # read the previous data ts_data = releasejob_cache.load('queue_information') if ts_data is None: old_queue_information = {} else: (_, old_queue_information) = ts_data stats = { 'peruser': 0, 'total': 0, 'release': 0, } release_jobids = [] for user, clusterdata in queue_information.items(): oldclusterdata = old_queue_information.setdefault(user, {}) totaluser = 0 for cluster, data in clusterdata.items(): olddata = oldclusterdata.setdefault(cluster, {}) # DRMJID is supposed to be unique # get all oldjobids in one dict oldjobs = dict([(j['DRMJID'], j['_release']) for jt in olddata.values() for j in jt]) for jobtype, jobs in data.items(): removeids = [] for idx, job in enumerate(jobs): jid = job['DRMJID'] if jobtype in RELEASEJOB_SUPPORTED_HOLDTYPES: totaluser += 1 release = max(oldjobs.get(jid, 0), 0) + 1 job['_release'] = release stats['release'] = max(stats['release'], release) release_jobids.append(jid) # release the job cmd = [m.clusters[cluster]['path'], '-u', jid] logger.info( "Releasing job %s cluster %s for the %s-th time." % (jid, cluster, release)) if dry_run: logger.info("Dry run %s" % cmd) else: m._run_moab_command(cmd, cluster, []) else: # keep historical data, eg a previously released job could be idle now # but keep the counter in case it gets held again try: release = oldjobs[jid] job['_release'] = release except KeyError: # not previously in hold, remove it removeids.append(idx) # remove the jobs (in reverse order) for remove_idx in removeids[::-1]: jobs.pop(remove_idx) # cleanup if len(jobs) == 0: data.pop(jobtype) # cleanup if len(data) == 0: clusterdata.pop(cluster) # cleanup if len(clusterdata) == 0: queue_information.pop(user) # update stats stats['peruser'] = max(stats['peruser'], totaluser) stats['total'] += totaluser logger.info( "Release statistics: total jobs in hold %(total)s; max in hold per user %(peruser)s; max releases per job %(release)s" % stats) # update and close releasejob_cache.update('queue_information', queue_information, 0) releasejob_cache.close() return release_jobids, stats
def process_hold(clusters, dry_run=False): """Process a filtered queueinfo dict""" releasejob_cache = FileCache(RELEASEJOB_CACHE_FILE) # get the showq data for data in clusters.values(): data['path'] = data['spath'] # showq path showq = Showq(clusters, cache_pickle=True) (queue_information, _, _) = showq.get_moab_command_information() # release the jobs, prepare the command m = MoabCommand(cache_pickle=False, dry_run=dry_run) for data in clusters.values(): data['path'] = data['mpath'] # mjobctl path m.clusters = clusters # read the previous data ts_data = releasejob_cache.load('queue_information') if ts_data is None: old_queue_information = {} else: (_, old_queue_information) = ts_data stats = { 'peruser': 0, 'total': 0, 'release': 0, } release_jobids = [] for user, clusterdata in queue_information.items(): oldclusterdata = old_queue_information.setdefault(user, {}) totaluser = 0 for cluster, data in clusterdata.items(): olddata = oldclusterdata.setdefault(cluster, {}) # DRMJID is supposed to be unique # get all oldjobids in one dict oldjobs = dict([(j['DRMJID'], j['_release']) for jt in olddata.values() for j in jt]) for jobtype, jobs in data.items(): removeids = [] for idx, job in enumerate(jobs): jid = job['DRMJID'] if jobtype in RELEASEJOB_SUPPORTED_HOLDTYPES: totaluser += 1 release = max(oldjobs.get(jid, 0), 0) + 1 job['_release'] = release stats['release'] = max(stats['release'], release) release_jobids.append(jid) # release the job cmd = [m.clusters[cluster]['path'], '-u', jid] logger.info("Releasing job %s cluster %s for the %s-th time." % (jid, cluster, release)) if dry_run: logger.info("Dry run %s" % cmd) else: m._run_moab_command(cmd, cluster, []) else: # keep historical data, eg a previously released job could be idle now # but keep the counter in case it gets held again try: release = oldjobs[jid] job['_release'] = release except KeyError: # not previously in hold, remove it removeids.append(idx) # remove the jobs (in reverse order) for remove_idx in removeids[::-1]: jobs.pop(remove_idx) # cleanup if len(jobs) == 0: data.pop(jobtype) # cleanup if len(data) == 0: clusterdata.pop(cluster) # cleanup if len(clusterdata) == 0: queue_information.pop(user) # update stats stats['peruser'] = max(stats['peruser'], totaluser) stats['total'] += totaluser logger.info("Release statistics: total jobs in hold %(total)s; max in hold per user %(peruser)s; max releases per job %(release)s" % stats) # update and close releasejob_cache.update('queue_information', queue_information, 0) releasejob_cache.close() return release_jobids, stats
def test_save_and_load(self, data, threshold): """Check if the loaded data is the same as the saved data.""" # create a tempfilename (handle, filename) = tempfile.mkstemp() os.unlink(filename) cache = FileCache(filename) for (key, value) in data.items(): cache.update(key, value, threshold) cache.close() now = time.time() new_cache = FileCache(filename) for key in data.keys(): info = cache.load(key) self.assertTrue(info is not None) (ts, value) = info self.assertTrue(value == data[key]) self.assertTrue(ts <= now) new_cache.close() os.unlink(filename)
def test_save_and_load(self): """Check if the loaded data is the same as the saved data.""" # test with random data data, threshold = get_rand_data() tempdir = tempfile.mkdtemp() # create a tempfilename (handle, filename) = tempfile.mkstemp(dir=tempdir) os.close(handle) shutil.rmtree(tempdir) cache = FileCache(filename) for (key, value) in data.items(): cache.update(key, value, threshold) cache.close() now = time.time() new_cache = FileCache(filename) for key in data.keys(): info = cache.load(key) self.assertTrue(info is not None) (ts, value) = info self.assertTrue(value == data[key]) self.assertTrue(ts <= now) new_cache.close() shutil.rmtree(tempdir)