Example #1
0
    def global_lock_setup(self):

        if self.afs:
            self.lockfn = os.path.join(self.sdir, "global_lock")
            lock_path = str(self.lockfn) + '.afs'
            lock_file = os.path.join(lock_path, "lock_file")
            try:
                if not os.path.exists(lock_path):
                    os.makedirs(lock_path)
                if not os.path.isfile(lock_file):
                    with open(lock_file, "w"):
                        pass
            except Exception as err:
                logger.debug("Global Lock Setup Error: %s" % err)
        else:
            try:
                self.lockfn = os.path.join(self.sdir, "global_lock")
                if not os.path.isfile(self.lockfn):
                    with open(self.lockfn, "w"):
                        # create file (does not interfere with existing sessions)
                        pass
            except IOError as x:
                raise RepositoryError(
                    self.repo,
                    "Could not create lock file '%s': %s" % (self.lockfn, x))
            except OSError as x:
                raise RepositoryError(
                    self.repo,
                    "Could not open lock file '%s': %s" % (self.lockfn, x))
Example #2
0
def test_all_exceptions(gpi):
    """Create all exceptions and make sure they behave correctly"""

    import Ganga.Core.exceptions
    test_str = "My Test Error"

    def exception_test(err_name):
        """Run tests on the given exception"""
        err_type = getattr(Ganga.Core.exceptions, err_name)
        err_obj = err_type(test_str)
        assert test_str in str(err_obj)

    err_list = [
        "GangaException", "GangaFileError", "PluginError",
        "ApplicationConfigurationError", "ApplicationPrepareError",
        "IncompleteJobSubmissionError", "IncompleteKillError",
        "JobManagerError", "GangaAttributeError", "GangaValueError",
        "GangaIOError", "SplitterError", "ProtectedAttributeError",
        "ReadOnlyObjectError", "TypeMismatchError", "SchemaError",
        "SchemaVersionError", "CredentialsError", "CredentialRenewalError",
        "InvalidCredentialError", "ExpiredCredentialError"
    ]

    for e in err_list:
        exception_test(e)

    # check the BackendError
    from Ganga.Core.exceptions import BackendError
    err = BackendError("TestBackend", test_str)
    assert "TestBackend" in str(err)
    assert test_str in str(err)

    # check the InaccessibleObjectError
    from Ganga.Core.exceptions import InaccessibleObjectError, JobManagerError
    from Ganga.Core.GangaRepository import getRegistry
    err = InaccessibleObjectError(
        getRegistry('jobs').repository, 0,
        JobManagerError("My JobManagerError"))
    assert "jobs" in str(err)
    assert "#0" in str(err)
    assert "My JobManagerError" in str(err)

    # check the RepositoryError
    from Ganga.Core.exceptions import RepositoryError
    from Ganga.Core.GangaRepository import getRegistry
    RepositoryError(getRegistry('jobs').repository, test_str)

    # Construct another to check the except clause in the exception is called
    RepositoryError(getRegistry('jobs').repository, test_str)
Example #3
0
 def session_read(self, fn):
     """ Reads a session file and returns a set of IDs locked by that session.
         The global lock MUST be held for this function to work, although on NFS additional
         locking is done
         Raises RepositoryError if severe access problems occur (corruption otherwise!) """
     try:
         # This can fail (thats OK, file deleted in the meantime)
         fd = None
         try:
             fd = os.open(fn, os.O_RDWR)
             os.lseek(fd, 0, 0)
             if not self.afs:  # additional locking for NFS
                 fcntl.lockf(fd, fcntl.LOCK_SH)
             try:
                 # 00)) # read up to 1 MB (that is more than enough...)
                 return pickle.loads(os.read(fd, 1048576))
             except Exception as x:
                 logger.warning(
                     "corrupt or inaccessible session file '%s' - ignoring it (Exception %s %s)."
                     % (fn, getName(x), x))
         finally:
             if fd is not None:
                 if not self.afs:  # additional locking for NFS
                     fcntl.lockf(fd, fcntl.LOCK_UN)
                 os.close(fd)
     except OSError as x:
         if x.errno != errno.ENOENT:
             raise RepositoryError(
                 self.repo,
                 "Error on session file access '%s': %s" % (fn, x))
     return set()
Example #4
0
    def afs_lock_require(self):
        try:
            lock_path = str(self.lockfn) + '.afs'
            lock_file = os.path.join(lock_path, "lock_file")

            def clean_path():
                oldtime = os.stat(lock_file).st_ctime
                nowtime = time.time()
                if abs(int(nowtime) - oldtime) > 10:
                    #logger.debug( "cleaning global lock" )
                    os.system("fs setacl %s %s rlidwka" %
                              (quote(lock_path), getpass.getuser()))

            while True:
                try:
                    if os.path.isfile(lock_file):
                        clean_path()
                    os.unlink(lock_file)
                    break
                except Exception as err:
                    logger.debug("Global Lock aquire Exception: %s" % err)
                    time.sleep(0.1)

            os.system("fs setacl %s %s rliwka" %
                      (quote(lock_path), getpass.getuser()))

            while not os.path.isfile(lock_file):
                with open(lock_file, "w"):
                    pass

            #logger.debug("global capture")
        except IOError as x:
            raise RepositoryError(self.repo,
                                  "IOError on AFS global lock: %s" % (x, ))
Example #5
0
 def numberRepos(self):
     try:
         assert (len(self.fns) == len(self.repos))
     except AssertionError:
         raise RepositoryError(
             "Number of repos is inconsistent with lock files")
     return len(self.fns)
Example #6
0
 def make_new_ids(self, n):
     """ Locks the next n available ids and returns them as a list 
         Raise RepositoryError on fatal error"""
     # Actualize count
     try:
         newcount = self.cnt_read()
     except ValueError:
         logger.warning(
             "Corrupt job counter (possibly due to crash of another session)! Trying to recover..."
         )
         newcount = self.count
     except OSError:
         raise RepositoryError(
             self.repo,
             "Job counter deleted! External modification to repository!")
     if not newcount >= self.count:
         #raise RepositoryError(self.repo, "Counter value decreased - logic error!")
         logger.warning(
             "Internal counter increased - probably the count file was deleted."
         )
         newcount = self.count
     # someone used force_ids (for example old repository imports)
     if self.locked and max(self.locked) >= newcount:
         newcount = max(self.locked) + 1
     ids = range(newcount, newcount + n)
     self.locked.update(ids)
     self.count = newcount + n
     self.cnt_write()
     self.session_write()
     return list(ids)
Example #7
0
    def lock_ids(self, ids):

        #logger.debug( "locking: %s" % ids)
        ids = set(ids)
        try:
            sessions = [
                sn for sn in os.listdir(self.sdir)
                if sn.endswith(self.name + ".locks")
            ]
        except OSError as x:
            raise RepositoryError(
                self.repo,
                "Could not list session directory '%s'!" % (self.sdir))

        slocked = set()
        for session in sessions:
            sf = os.path.join(self.sdir, session)
            if sf == self.fn:
                continue
            slocked.update(self.session_read(sf))
        #logger.debug( "locked: %s" % slocked)
        ids.difference_update(slocked)
        self.locked.update(ids)
        #logger.debug( "stored_lock: %s" % self.locked)
        self.session_write()
        #logger.debug( "list: %s" % list(ids))
        return list(ids)
Example #8
0
    def _add(self, obj, force_index=None):
        """ Add an object to the registry and assigns an ID to it. 
        use force_index to set the index (for example for metadata). This overwrites existing objects!
        Raises RepositoryError
        Args:
            _obj (GangaObject): This is th object which is to be added to this Registy/Repo
            force_index (int): This is the index which we will give the object, None = auto-assign
        """
        logger.debug("_add")

        if self.hasStarted() is not True:
            raise RepositoryError("Cannot add objects to a disconnected repository!")

        if force_index is None:
            ids = self.repository.add([obj])
        else:
            if len(self.repository.lock([force_index])) == 0:
                raise RegistryLockError("Could not lock '%s' id #%i for a new object!" % (self.name, force_index))
            ids = self.repository.add([obj], [force_index])

        obj._setRegistry(self)
        obj._registry_locked = True

        self.repository.flush(ids)

        return ids[0]
Example #9
0
 def mkdir(self, dn):
     """Make sure the given directory exists"""
     try:
         os.makedirs(dn)
     except OSError as x:
         if x.errno != errno.EEXIST:
             raise RepositoryError(self.repo,
                                   "OSError on directory create: %s" % x)
Example #10
0
    def cnt_read(self):
        """ Tries to read the counter file.
            Raises ValueError (invalid contents)
            Raises OSError (no access/does not exist)
            Raises RepositoryError (fatal)
            """
        try:
            if self.last_count_access is not None:
                last_count_time = self.last_count_access.time
                last_count_val = self.last_count_access.val
                last_time = os.stat(self.cntfn).st_ctime
                if last_time == last_count_time:
                    return last_count_val  # If the file hasn't changed since last check, return the cached value
            _output = None
            fd = None
            try:
                fd = os.open(self.cntfn, os.O_RDWR)
                if not self.afs:  # additional locking for NFS
                    fcntl.lockf(fd, fcntl.LOCK_SH)
                # 100 bytes should be enough for any ID. Can raise ValueErrorr
                _output = int(os.read(fd, 100).split("\n")[0])
            finally:
                if fd is not None:
                    if not self.afs:  # additional locking for NFS
                        fcntl.lockf(fd, fcntl.LOCK_UN)
                    os.close(fd)

                if _output != None:
                    self.last_count_access = SessionLockManager.LastCountAccess(
                        os.stat(self.cntfn).st_ctime, _output)
                    return _output

        except OSError as x:
            if x.errno != errno.ENOENT:
                raise RepositoryError(
                    self.repo,
                    "OSError on count file '%s' read: %s" % (self.cntfn, x))
            else:
                # This can be a recoverable error, depending on where it occurs
                raise
        except IOError as x:
            raise RepositoryError(
                self.repo,
                "Locking error on count file '%s' write: %s" % (self.cntfn, x))
Example #11
0
    def afs_lock_release(self):
        try:
            lock_path = str(self.lockfn) + '.afs'
            os.system("fs setacl %s %s rlidwka" %
                      (quote(lock_path), getpass.getuser()))

            #logger.debug("global release")
        except IOError as x:
            raise RepositoryError(self.repo,
                                  "IOError on AFS global lock: %s" % (x, ))
Example #12
0
    def removeRepo(self, fn, repo):
        #logger.debug("Removing fn: %s" % fn )
        self.fns.remove(fn)
        #logger.debug("Removing fn: %s" % fn )
        self.repos.remove(repo)

        try:
            assert (len(self.fns) == len(self.repos))
        except AssertionError:
            raise RepositoryError(
                "Number of repos is inconsistent after removing repo!")
Example #13
0
 def cnt_write(self):
     """ Writes the counter to the counter file. 
         The global lock MUST be held for this function to work correctly
         Raises OSError if count file is inaccessible """
     finished = False
     try:
         # If this fails, we want to shutdown the repository (corruption
         # possible)
         fd = None
         try:
             fd = os.open(self.cntfn, os.O_RDWR)
             if not self.afs:
                 fcntl.lockf(fd, fcntl.LOCK_EX)
             os.write(fd, str(self.count) + "\n")
             if not self.afs:
                 fcntl.lockf(fd, fcntl.LOCK_UN)
         finally:
             if fd is not None:
                 os.close(fd)
         finished = True
     except OSError as x:
         if x.errno != errno.ENOENT:
             raise RepositoryError(
                 self.repo,
                 "OSError on count file '%s' write: %s" % (self.cntfn, x))
         else:
             raise RepositoryError(
                 self.repo,
                 "Count file '%s' not found! Repository was modified externally!"
                 % (self.cntfn))
     except IOError as x:
         raise RepositoryError(
             self.repo,
             "Locking error on count file '%s' write: %s" % (self.cntfn, x))
     finally:
         if finished is True:
             self.last_count_access = SessionLockManager.LastCountAccess(
                 os.stat(self.cntfn).st_ctime, self.count)
Example #14
0
 def check(self):
     with open(self.cntfn) as f:
         newcount = int(f.readline())
     try:
         assert newcount >= self.count
     except AssertionError:
         raise RepositoryError(
             "Count in lock file: %s is now inconsistent!" % self.cntfn)
     sessions = os.listdir(self.sdir)
     prevnames = set()
     for session in sessions:
         if not session.endswith(self.name + ".locks"):
             continue
         fd = None
         try:
             sf = self._path_helper(session)
             if not self.afs:
                 fd = os.open(sf, os.O_RDWR)
                 fcntl.lockf(fd, fcntl.LOCK_SH)  # ONLY NFS
             with open(sf) as sf_file:
                 names = pickle.load(sf_file)
             if not self.afs and fd is not None:
                 fcntl.lockf(fd, fcntl.LOCK_UN)  # ONLY NFS
         except Exception as x:
             logger.warning("CHECKER: session file %s corrupted: %s %s" %
                            (session, getName(x), x))
             continue
         finally:
             if fd is not None:
                 os.close(fd)
         if not len(names & prevnames) == 0:
             logger.error("Double-locked stuff: " + names & prevnames)
             raise RepositoryError(
                 "Lock file has double-locked objects: %s" %
                 str(names & prevnames))
         # prevnames.union_update(names) Should be alias to update but
         # not in some versions of python
         prevnames.update(names)
Example #15
0
 def session_write(self):
     """ Writes the locked set to the session file. 
         The global lock MUST be held for this function to work, although on NFS additional
         locking is done
         Raises RepositoryError if session file is inaccessible """
     #logger.debug("Openining Session File: %s " % self.fn )
     try:
         # If this fails, we want to shutdown the repository (corruption
         # possible)
         fd = None
         try:
             fd = os.open(self.fn, os.O_RDWR)
             if not self.afs:
                 fcntl.lockf(fd, fcntl.LOCK_EX)
             os.write(fd, pickle.dumps(self.locked))
             if not self.afs:
                 fcntl.lockf(fd, fcntl.LOCK_UN)
             os.fsync(fd)
         finally:
             if fd is not None:
                 os.close(fd)
     except OSError as x:
         if x.errno != errno.ENOENT:
             raise RepositoryError(
                 self.repo,
                 "Error on session file access '%s': %s" % (self.fn, x))
         else:
             #logger.debug( "File NOT found %s" %self.fn )
             raise RepositoryError(
                 self.repo,
                 "SessionWrite: Own session file not found! Possibly deleted by another ganga session.\n\
                                 Possible reasons could be that this computer has a very high load, or that the system clocks on computers running Ganga are not synchronized.\n\
                                 On computers with very high load and on network filesystems, try to avoid running concurrent ganga sessions for long.\n '%s' : %s"
                 % (self.fn, x))
     except IOError as x:
         raise RepositoryError(
             self.repo,
             "Error on session file locking '%s': %s" % (self.fn, x))
Example #16
0
def setupGlobalLockRef(session_name, sdir, gfn, _on_afs):
    global session_lock_refresher
    if session_lock_refresher is None:
        try:
            # Just opening and closing a file to create it
            os.close(os.open(gfn, os.O_EXCL | os.O_CREAT | os.O_WRONLY))
            registerGlobalSessionFile(gfn)
        except OSError as err:
            logger.debug("Startup Lock Refresher Exception: %s" % err)
            raise RepositoryError(
                None, "Error on session file '%s' creation: %s" % (gfn, err))
        session_lock_refresher = SessionLockRefresher(session_name, sdir, gfn,
                                                      None, _on_afs)
        session_lock_refresher.start()
    return session_lock_refresher
Example #17
0
 def updateLocksNow(self, index, failCount=0):
     this_index_file = self.fns[index]
     now = None
     try:
         oldnow = os.stat(this_index_file).st_ctime
         os.system('touch "%s"' % this_index_file)
         now = os.stat(
             this_index_file).st_ctime  # os.stat(self.fn).st_ctime
     except OSError as x:
         if x.errno != errno.ENOENT:
             logger.debug(
                 "Session file timestamp could not be updated! Locks could be lost!"
             )
             if now is None and failCount < 4:
                 try:
                     logger.debug(
                         "Attempting to lock file again, unknown error:\n'%s'"
                         % x)
                     import time
                     time.sleep(0.5)
                     failcount = failCount + 1
                     now = self.updateLocksNow(index, failcount)
                 except Exception as err:
                     now = -999.
                     logger.debug(
                         "Received another type of exception, failing to update lockfile: %s"
                         % this_index_file)
             else:
                 logger.warning("Failed to update lock file: %s 5 times." %
                                this_index_file)
                 logger.warning(
                     "This could be due to a filesystem problem, or multiple versions of ganga trying to access the same file"
                 )
                 now = -999.
         else:
             if self.repos[index] != None:
                 raise RepositoryError(
                     self.repos[index],
                     "[SessionFileUpdate] Run: Own session file not found! Possibly deleted by another ganga session.\n\
                 Possible reasons could be that this computer has a very high load, or that the system clocks on computers running Ganga are not synchronized.\n\
                 On computers with very high load and on network filesystems, try to avoid running concurrent ganga sessions for long.\n '%s' : %s"
                     % (this_index_file, x))
             else:
                 from Ganga.Core.exceptions import GangaException
                 raise GangaException(
                     "Error Opening global .session file for this session: %s"
                     % this_index_file)
     return now
Example #18
0
 def finish_startup(self):
     # setup counter file if it does not exist, read it if it does
     if not os.path.exists(self.cntfn):
         fd = None
         try:
             fd = os.open(self.cntfn, os.O_EXCL | os.O_CREAT | os.O_WRONLY)
             os.write(fd, "0")
         except OSError as x:
             if x.errno != errno.EEXIST:
                 raise RepositoryError(
                     self.repo, "OSError on count file create: %s" % x)
         finally:
             if fd is not None:
                 os.close(fd)
     try:
         self.count = max(self.count, self.cnt_read())
     except ValueError, err:
         logger.debug("Startup ValueError Exception: %s" % err)
         logger.error("Corrupt count file '%s'! Trying to recover..." %
                      (self.cntfn))
Example #19
0
 def add(self, objs, force_ids=None):
     """
     Add the object to the main dict
     Args:
         objs (list): Objects we want to store in memory
         force_ids (list, None): IDs to assign to the objects, None for auto-assign
     """
     try:
         assert force_ids is None or len(force_ids) == len(objs)
     except AssertionError:
         raise RepositoryError(
             "Inconsistent number of objects and ids, can't add to Repository"
         )
     ids = []
     for i in range(len(objs)):
         obj = objs[i]
         if force_ids:
             id = force_ids[i]
         else:
             id = self._next_id
         self._internal_setitem__(id, obj)
         ids.append(id)
         self._next_id = max(self._next_id + 1, id + 1)
     return ids
Example #20
0
                         (self.cntfn))
        except OSError as err:
            logger.debug("Startup OSError Exception: %s" % err)
            raise RepositoryError(
                self.repo, "OSError on count file '%s' access!" % (self.cntfn))
        self.cnt_write()
        # Setup session file
        fd = None
        try:
            fd = os.open(self.fn, os.O_EXCL | os.O_CREAT | os.O_WRONLY)
            os.write(fd, pickle.dumps(set()))
            registerGlobalSessionFile(self.fn)
        except OSError as err:
            logger.debug("Startup Session Exception: %s" % err)
            raise RepositoryError(
                self.repo,
                "Error on session file '%s' creation: %s" % (self.fn, err))
        finally:
            if fd is not None:
                os.close(fd)

        setupGlobalLockRef(self.session_name, self.sdir, self.gfn, self.afs)

        session_lock_refresher.addRepo(self.fn, self.repo)
        self.session_write()

    def updateNow(self):
        session_lock_refresher.updateNow()

    @synchronised
    def shutdown(self):
Example #21
0
class SessionLockManager(object):
    """ Class with thread that keeps a global lock file that synchronizes
    ID and counter access across Ganga sessions.
    DEVELOPER WARNING: On NFS, files that are not locked with lockf (NOT flock) will 
    NOT be synchronized across clients, even if a global lock file is used!
    Interface:
        * startup() starts the session, automatically called on init
        * shutdown() stops the thread, FREES ALL LOCKS
        * make_new_ids(n) returns n new (locked) ids
        * lock_ids(ids) returns the ids that were successfully locked
        * release_ids(ids) returns the ids that were successfully released (now: all)
    All access to an instance of this class MUST be synchronized!
    Should ONLY raise RepositoryError (if possibly-corrupting errors are found)
    """

    LastCountAccess = namedtuple('LastCountAccess', ['time', 'val'])

    def mkdir(self, dn):
        """Make sure the given directory exists"""
        try:
            os.makedirs(dn)
        except OSError as x:
            if x.errno != errno.EEXIST:
                raise RepositoryError(self.repo,
                                      "OSError on directory create: %s" % x)

    def __init__(self, repo, root, name, minimum_count=0):

        self.repo = repo
        self.mkdir(root)
        realpath = os.path.realpath(root)
        # Use the hostname (os.uname()[1])  and the current time in ms to construct the session filename.
        # TODO: Perhaps put the username here?
        if session_lock_refresher is None:
            t = datetime.datetime.now()
            this_date = t.strftime("%H.%M_%A_%d_%B_%Y")
            session_name = ".".join([
                os.uname()[1],
                str(this_date), "PID",
                str(os.getpid()), "session"
            ])
            #session_name = ".".join([os.uname()[1], str(int(time.time()*1000)), str(os.getpid()), "session"])
        else:
            session_name = session_lock_refresher.session_name

        self.sdir = os.path.join(realpath, "sessions")
        self.gfn = os.path.join(self.sdir, session_name)
        self.fn = os.path.join(self.sdir, session_name + "." + name + ".locks")
        self.cntfn = os.path.join(realpath, name, "cnt")

        self.afs = (realpath[:4] == "/afs")
        self.locked = set()
        self.count = minimum_count
        self.session_name = session_name
        self.name = name
        self.realpath = realpath
        #logger.debug( "Initializing SessionLockManager: " + self.fn )
        self._lock = threading.RLock()
        self.last_count_access = None

    @synchronised
    def startup(self):
        self.last_count_access = None

        # Ensure directories exist
        self.mkdir(os.path.join(self.realpath, "sessions"))
        self.mkdir(os.path.join(self.realpath, self.name))

        # setup global lock
        self.global_lock_setup()

        self.finish_startup()

    @global_disk_lock
    def finish_startup(self):
        # setup counter file if it does not exist, read it if it does
        if not os.path.exists(self.cntfn):
            fd = None
            try:
                fd = os.open(self.cntfn, os.O_EXCL | os.O_CREAT | os.O_WRONLY)
                os.write(fd, "0")
            except OSError as x:
                if x.errno != errno.EEXIST:
                    raise RepositoryError(
                        self.repo, "OSError on count file create: %s" % x)
            finally:
                if fd is not None:
                    os.close(fd)
        try:
            self.count = max(self.count, self.cnt_read())
        except ValueError, err:
            logger.debug("Startup ValueError Exception: %s" % err)
            logger.error("Corrupt count file '%s'! Trying to recover..." %
                         (self.cntfn))
        except OSError as err:
            logger.debug("Startup OSError Exception: %s" % err)
            raise RepositoryError(
                self.repo, "OSError on count file '%s' access!" % (self.cntfn))