Exemple #1
0
    def master_resubmit(self, rjobs, backend=None):
        """ Resubmit (previously submitted) job. Configuration phase is skipped.
        Default implementation works is an emulated-bulk operation.
        If you override this method for bulk optimization then make sure that you call updateMasterJobStatus() on the master job,
        so the master job will be monitored by the monitoring loop.
        """
        from Ganga.Core import IncompleteJobSubmissionError, GangaException
        from Ganga.Utility.logging import log_user_exception
        incomplete = 0

        def handleError(x):
            if incomplete:
                raise x
            else:
                return 0

        try:
            for sj in rjobs:
                fqid = sj.getFQID('.')
                logger.info("resubmitting job %s to %s backend", fqid,
                            getName(sj.backend))
                try:
                    b = sj.backend
                    sj.updateStatus('submitting')
                    if backend is None:
                        result = b.resubmit()
                    else:
                        result = b.resubmit(backend=backend)
                    if result:
                        sj.updateStatus('submitted')
                        # sj._commit() # PENDING: TEMPORARY DISABLED
                        incomplete = 1
                    else:
                        return handleError(
                            IncompleteJobSubmissionError(
                                fqid, 'resubmission failed'))
                except Exception as x:
                    log_user_exception(logger, debug=isType(x, GangaException))
                    return handleError(
                        IncompleteJobSubmissionError(fqid, str(x)))
        finally:
            master = self.getJobObject().master
            if master:
                master.updateMasterJobStatus()
        return 1
Exemple #2
0
    def master_auto_resubmit(self, rjobs):
        '''Duplicate of the IBackend.master_resubmit but hooked into auto resubmission
        such that the monitoring server is used rather than the user server
        Args:
            rjobs (list): This is a list of jobs which are to be auto-resubmitted'''

        incomplete = 0

        def handleError(x):
            if incomplete:
                raise x
            else:
                return 0

        try:
            for sj in rjobs:
                fqid = sj.getFQID('.')
                logger.info("resubmitting job %s to %s backend", fqid,
                            getName(sj.backend))
                try:
                    b = sj.backend
                    sj.updateStatus('submitting')
                    result = b._resubmit()
                    if result:
                        sj.updateStatus('submitted')
                        # sj._commit() # PENDING: TEMPORARY DISABLED
                        incomplete = 1
                    else:
                        return handleError(
                            IncompleteJobSubmissionError(
                                fqid, 'resubmission failed'))
                except Exception as x:
                    log_user_exception(logger,
                                       debug=isType(x, GangaDiracError))
                    return handleError(
                        IncompleteJobSubmissionError(fqid, str(x)))
        finally:
            master = self.getJobObject().master
            if master:
                master.updateMasterJobStatus()
        return 1
Exemple #3
0
    def _parallel_submit(self, b, sj, sc, master_input_sandbox, fqid, logger):

        try:
            sj.updateStatus('submitting')
            if b.submit(sc, master_input_sandbox):
                sj.info.increment()
                return 1
            else:
                raise IncompleteJobSubmissionError(fqid, 'submission failed')
        except Exception as err:
            logger.error("Parallel Job Submission Failed: %s" % err)
            return 0
Exemple #4
0
    def _parallel_submit(self, b, sj, sc, master_input_sandbox, fqid, logger):

        try:
            sj.updateStatus('submitting')
            if b.submit(sc, master_input_sandbox):
                sj.updateStatus('submitted')
                sj.info.increment()
            else:
                raise IncompleteJobSubmissionError(fqid, 'submission failed')
        except Exception as err:
            #from Ganga.Utility.logging import log_user_exception
            sj.updateStatus('failed')

            #from Ganga.Core.exceptions import GangaException
            #if isinstance(err, GangaException):
            #    logger.error(str(err))
            #    #log_user_exception(logger, debug=True)
            #else:
            #    #log_user_exception(logger, debug=False)
            logger.error("Parallel Job Submission Failed: %s" % str(err))
        finally:
            pass
Exemple #5
0
    def master_submit(self,
                      rjobs,
                      subjobconfigs,
                      masterjobconfig,
                      keep_going=False,
                      parallel_submit=False):
        """  Submit   the  master  job  and  all   its  subjobs.   The
        masterjobconfig  is  shared,  individual  subjob  configs  are
        defined  in  subjobconfigs.   Submission  of  individual  jobs
        (not-split) also  always goes via  this method.  In  that case
        the subjobconfigs contains just one element - the job itself.

        The default  implementation of  this method emulates  the bulk
        submission  calling  a submit()  method  on individual  subjob
        objects.  If submission  of any of the subjobs  fails then the
        whole   process  is  aborted   with  IncompleteSubmissionError
        exception. The subjobs which  have already been submitted stay
        submitted.

        The default implementation does not process the masterjobconfig.
        Therefore this method may be overriden in the derived class
        in the following way:

        def master_submit(self,masterjobconfig,subjobconfigs,keep_going):
           ... 
           do_some_processsing_of(masterjobconfig)
           ...
           return IBackend.master_submit(self,subjobconfigs,masterjobconfig,keep_joing)


        Implementation note: we set keep_going to be optional in the
        signature of IBackend.master_submit() to allow the existing
        backend implementations which do not support keep_going=True
        and which at some point may call IBackend.master_submit() to
        work without change. It may sometimes be non-trivial to enable
        support for keep_going=True in some backends, even if the
        finally call IBackend.master_submit(). Therefore it is left to
        the decision of backend developer to explicitly enable the
        support for keep_going flag.

        """
        from Ganga.Core import IncompleteJobSubmissionError, GangaException
        from Ganga.Utility.logging import log_user_exception

        logger.debug("SubJobConfigs: %s" % len(subjobconfigs))
        logger.debug("rjobs: %s" % len(rjobs))
        assert (implies(rjobs, len(subjobconfigs) == len(rjobs)))

        incomplete = 0
        incomplete_subjobs = []

        def handleError(x):
            if keep_going:
                incomplete_subjobs.append(fqid)
                return False
            else:
                if incomplete:
                    raise x
                else:
                    return True

        master_input_sandbox = self.master_prepare(masterjobconfig)

        if parallel_submit:

            from Ganga.GPI import queues

            threads_before = queues.totalNumIntThreads()

            for sc, sj in zip(subjobconfigs, rjobs):

                fqid = sj.getFQID('.')
                b = sj.backend
                # FIXME would be nice to move this to the internal threads not user ones
                #from Ganga.GPIDev.Base.Proxy import stripProxy
                #all_queues = stripProxy(queues)
                #all_queues._addSystem( self._parallel_submit, ( b, sj, sc, master_input_sandbox, fqid, logger ) )
                queues._monitoring_threadpool.add_function(
                    self._parallel_submit,
                    (b, sj, sc, master_input_sandbox, fqid, logger))

            def subjob_status_check(rjobs):
                has_submitted = True
                for sj in rjobs:
                    if sj.status not in [
                            "submitted", "failed", "completed", "running",
                            "completing"
                    ]:
                        has_submitted = False
                        break
                return has_submitted

            while not subjob_status_check(rjobs):
                import time
                time.sleep(1.)

            for i in rjobs:
                if i.status in ["new", "failed"]:
                    return 0
            return 1

        for sc, sj in zip(subjobconfigs, rjobs):

            fqid = sj.getFQID('.')
            logger.info("submitting job %s to %s backend", fqid,
                        getName(sj.backend))
            try:
                b = stripProxy(sj.backend)
                sj.updateStatus('submitting')
                if b.submit(sc, master_input_sandbox):
                    sj.updateStatus('submitted')
                    # sj._commit() # PENDING: TEMPORARY DISABLED
                    incomplete = 1
                    stripProxy(sj.info).increment()
                else:
                    if handleError(
                            IncompleteJobSubmissionError(
                                fqid, 'submission failed')):
                        return 0
            except Exception as x:
                #sj.updateStatus('new')
                if isType(x, GangaException):
                    logger.error(str(x))
                    log_user_exception(logger, debug=True)
                else:
                    log_user_exception(logger, debug=False)
                if handleError(IncompleteJobSubmissionError(fqid, str(x))):
                    return 0

        if incomplete_subjobs:
            raise IncompleteJobSubmissionError(incomplete_subjobs,
                                               'submission failed')

        return 1