Ejemplo n.º 1
0
    def master_auto_resubmit(self, rjobs):
        '''Duplicate of the IBackend.master_resubmit but hooked into auto resubmission
        such that the monitoring server is used rather than the user server'''
        from Ganga.Core import IncompleteJobSubmissionError, GangaException
        from Ganga.Utility.logging import log_user_exception
        incomplete = 0

        def handleError(x):
            if incomplete:
                raise x
            else:
                return 0
        try:
            for sj in rjobs:
                fqid = sj.getFQID('.')
                logger.info("resubmitting job %s to %s backend", fqid, getName(sj.backend))
                try:
                    b = sj.backend
                    sj.updateStatus('submitting')
                    result = b._resubmit()
                    if result:
                        sj.updateStatus('submitted')
                        # sj._commit() # PENDING: TEMPORARY DISABLED
                        incomplete = 1
                    else:
                        return handleError(IncompleteJobSubmissionError(fqid, 'resubmission failed'))
                except Exception as x:
                    log_user_exception(logger, debug=isType(x, GangaException))
                    return handleError(IncompleteJobSubmissionError(fqid, str(x)))
        finally:
            master = self.getJobObject().master
            if master:
                master.updateMasterJobStatus()
        return 1
Ejemplo n.º 2
0
def runAutoMerge(job, new_status):
    """Method to run the merge command."""
    
    result = False

    #we only run on master jobs (which have no parent)
    if job._getParent() != None:
        return result
    
    allowed_states = ['completed','failed','killed']
    if not new_status in allowed_states:
        return result
    
    try:
        if job.merger:
            #we run if master is in a failed state if ignorefailed flag is set
            if new_status == allowed_states[0] or job.merger.ignorefailed:

                # leave the output directory to the implementation (fix for http://savannah.cern.ch/bugs/?76445)
                sum_outputdir = None
                if job.merger.set_outputdir_for_automerge:
                    sum_outputdir = job.outputdir

                result = job.merger.merge(job.subjobs, sum_outputdir)

    except Exception:
        log_user_exception()
        raise
    
    return result
Ejemplo n.º 3
0
 def log_error():
     log.error('Problem in the monitoring loop: %s', str(x))
     #if show_traceback:
     #    log.error("exception: ", exc_info=1)
     #    #log_unknown_exception()
     #    import traceback
     #    traceback.print_stack()
     if show_traceback:
         log_user_exception(log)
Ejemplo n.º 4
0
 def submit(self,**opts):
    """Submission of a job.
    Called by: ganga client. """
    
    ret = {}
    for monService in self.monMonServices:
       try:
          monClass = str(monService.__class__)
          ret[monClass] = monService.submit(**opts)
       except Exception,e:
          #discard errors in initialization of monitoring services
          self._log(level="warning",msg="%s monitoring service failed in job *submit*" % monClass)               
          from Ganga.Utility.logging import log_user_exception
          log_user_exception(self.logger)
Ejemplo n.º 5
0
    def submit(self, **opts):
        """Submission of a job.
        Called by: ganga client. """

        ret = {}
        for monService in self.monMonServices:
            try:
                monClass = str(monService.__class__)
                ret[monClass] = monService.submit(**opts)
            except Exception as e:
                # discard errors in initialization of monitoring services
                self.logger.warning("%s monitoring service failed in job *submit*" % monClass)
                from Ganga.Utility.logging import log_user_exception
                log_user_exception(self.logger)
        return ret
Ejemplo n.º 6
0
    def __init__(self, monClasses, jobInfos, configInfos):
        """Create a new composite monitoring service based on the lists of
        monitoring classes, jobs and configs (all the same length).

        If this is called in the Ganga client, i.e. from Ganga/GPIDev/MonitoringServices,
        then jobInfos is a list of Job (all the same), configInfos is a list of
        Config (specific to each monitoring class).

        If this is called on the worker node, i.e. from the text generated by
        getWrapperScriptConstructorText(), the jobInfos are dictionaries (specific
        to each monitoring class) and configInfos are dictionaries of effective
        config options (specific to each monitoring class).
        """

        if not (len(monClasses) == len(jobInfos) == len(configInfos)):
            raise Exception(
                "cannot create monitoring object, list of monitoring classes, jobs and configs are not the same length.")

        IMonitoringService.__init__(self, jobInfos, configInfos)

        # init the logger
        try:
            import Ganga.Utility.logging
            self.logger = Ganga.Utility.logging.getLogger()
        except ImportError:
            # on the worker node we don't have access to Ganga logging facilities
            # so we simple print out the log message
            #@see self._log()
            self.logger = None

        # init the monitoring services
        self.monMonServices = []
        for i in range(len(monClasses)):
            try:
                monClass = monClasses[i]
                # allow for existing monitoring classes which do not take
                # config_info in constructor
                if configInfos[i] is None:
                    monService = monClass(jobInfos[i])
                else:
                    monService = monClass(jobInfos[i], configInfos[i])
                self.monMonServices.append(monService)
            except Exception as e:
                # discard errors in initialization of monitoring services
                self._log(
                    level="warning", msg="Failed to init %s monitoring service...discarding it" % str(monClass))
                from Ganga.Utility.logging import log_user_exception
                log_user_exception(self.logger)
Ejemplo n.º 7
0
            def process(self, sj_info):
                my_sc = sj_info[0]
                my_sj = sj_info[1]

                try:
                    logger.debug("preparing job %s" % my_sj.getFQID('.'))
                    jdlpath = my_sj.backend.preparejob(my_sc, master_input_sandbox)

                    if (not jdlpath) or (not os.path.exists(jdlpath)):
                        raise GangaException('job %s not properly prepared' % my_sj.getFQID('.'))

                    self.__appendResult__( my_sj.id, jdlpath )
                    return True
                except Exception,x:
                    log_user_exception()
                    return False
Ejemplo n.º 8
0
    def __init__(self, monClasses, jobInfos, configInfos):
        """Create a new composite monitoring service based on the lists of
        monitoring classes, jobs and configs (all the same length).

        If this is called in the Ganga client, i.e. from Ganga/GPIDev/MonitoringServices,
        then jobInfos is a list of Job (all the same), configInfos is a list of
        Config (specific to each monitoring class).

        If this is called on the worker node, i.e. from the text generated by
        getWrapperScriptConstructorText(), the jobInfos are dictionaries (specific
        to each monitoring class) and configInfos are dictionaries of effective
        config options (specific to each monitoring class).
        """

        if not (len(monClasses) == len(jobInfos) == len(configInfos)):
            raise Exception(
                "cannot create monitoring object, list of monitoring classes, jobs and configs are not the same length.")

        IMonitoringService.__init__(self, jobInfos, configInfos)

        # init the logger
        try:
            import Ganga.Utility.logging
            self.logger = Ganga.Utility.logging.getLogger()
        except ImportError:
            # on the worker node we don't have access to Ganga logging facilities
            # so we simple print out the log message
            self.logger = None

        # init the monitoring services
        self.monMonServices = []
        for i in range(len(monClasses)):
            try:
                monClass = monClasses[i]
                # allow for existing monitoring classes which do not take
                # config_info in constructor
                if configInfos[i] is None:
                    monService = monClass(jobInfos[i])
                else:
                    monService = monClass(jobInfos[i], configInfos[i])
                self.monMonServices.append(monService)
            except Exception as e:
                # discard errors in initialization of monitoring services
                self.logger.warning("Failed to init %s monitoring service...discarding it" % str(monClass))
                from Ganga.Utility.logging import log_user_exception
                log_user_exception(self.logger)
Ejemplo n.º 9
0
    def master_resubmit(self, rjobs, backend=None):
        """ Resubmit (previously submitted) job. Configuration phase is skipped.
        Default implementation works is an emulated-bulk operation.
        If you override this method for bulk optimization then make sure that you call updateMasterJobStatus() on the master job,
        so the master job will be monitored by the monitoring loop.
        """
        from Ganga.Core import IncompleteJobSubmissionError, GangaException
        from Ganga.Utility.logging import log_user_exception
        incomplete = 0

        def handleError(x):
            if incomplete:
                raise x
            else:
                return 0

        try:
            for sj in rjobs:
                fqid = sj.getFQID('.')
                logger.info("resubmitting job %s to %s backend", fqid,
                            getName(sj.backend))
                try:
                    b = sj.backend
                    sj.updateStatus('submitting')
                    if backend is None:
                        result = b.resubmit()
                    else:
                        result = b.resubmit(backend=backend)
                    if result:
                        sj.updateStatus('submitted')
                        # sj._commit() # PENDING: TEMPORARY DISABLED
                        incomplete = 1
                    else:
                        return handleError(
                            IncompleteJobSubmissionError(
                                fqid, 'resubmission failed'))
                except Exception as x:
                    log_user_exception(logger, debug=isType(x, GangaException))
                    return handleError(
                        IncompleteJobSubmissionError(fqid, str(x)))
        finally:
            master = self.getJobObject().master
            if master:
                master.updateMasterJobStatus()
        return 1
Ejemplo n.º 10
0
            def process(self, sj_info):
                my_sc = sj_info[0]
                my_sj = sj_info[1]

                try:
                    logger.debug("preparing job %s" % my_sj.getFQID('.'))
                    jdlpath = my_sj.backend.preparejob(my_sc,
                                                       master_input_sandbox)

                    if (not jdlpath) or (not os.path.exists(jdlpath)):
                        raise GangaException('job %s not properly prepared' %
                                             my_sj.getFQID('.'))

                    self.__appendResult__(my_sj.id, jdlpath)
                    return True
                except Exception as x:
                    log_user_exception()
                    return False
Ejemplo n.º 11
0
    def master_resubmit(self, rjobs, backend=None):
        """ Resubmit (previously submitted) job. Configuration phase is skipped.
        Default implementation works is an emulated-bulk operation.
        If you override this method for bulk optimization then make sure that you call updateMasterJobStatus() on the master job,
        so the master job will be monitored by the monitoring loop.
        """
        from Ganga.Core import IncompleteJobSubmissionError, GangaException
        from Ganga.Utility.logging import log_user_exception
        incomplete = 0

        def handleError(x):
            if incomplete:
                raise x
            else:
                return 0
        try:
            for sj in rjobs:
                fqid = sj.getFQID('.')
                logger.info(
                    "resubmitting job %s to %s backend", fqid, sj.backend._name)
                try:
                    b = sj.backend
                    sj.updateStatus('submitting')
                    if backend is None:
                        result = b.resubmit()
                    else:
                        result = b.resubmit(backend=backend)
                    if result:
                        sj.updateStatus('submitted')
                        # sj._commit() # PENDING: TEMPORARY DISABLED
                        incomplete = 1
                    else:
                        return handleError(IncompleteJobSubmissionError(fqid, 'resubmission failed'))
                except Exception as x:
                    log_user_exception(
                        logger, debug=isinstance(x, GangaException))
                    return handleError(IncompleteJobSubmissionError(fqid, str(x)))
        finally:
            master = self.getJobObject().master
            if master:
                master.updateMasterJobStatus()
        return 1
Ejemplo n.º 12
0
    def _parallel_submit(self, b, sj, sc, master_input_sandbox, fqid, logger):

        try:
            sj.updateStatus('submitting')
            if b.submit(sc, master_input_sandbox):
                sj.updateStatus('submitted')
                sj.info.increment()
            else:
                raise IncompleteJobSubmissionError(fqid, 'submission failed')
        except Exception as err:
            #from Ganga.Utility.logging import log_user_exception
            sj.updateStatus('failed')

            from Ganga.Core.exceptions import GangaException
            if isinstance(err, GangaException):
                logger.error(str(err))
                log_user_exception(logger, debug=True)
            else:
                log_user_exception(logger, debug=False)
        finally:
            pass
Ejemplo n.º 13
0
    def master_submit(self, rjobs, subjobconfigs, masterjobconfig, keep_going=False, parallel_submit=False):
        """  Submit   the  master  job  and  all   its  subjobs.   The
        masterjobconfig  is  shared,  individual  subjob  configs  are
        defined  in  subjobconfigs.   Submission  of  individual  jobs
        (not-split) also  always goes via  this method.  In  that case
        the subjobconfigs contains just one element - the job itself.

        The default  implementation of  this method emulates  the bulk
        submission  calling  a submit()  method  on individual  subjob
        objects.  If submission  of any of the subjobs  fails then the
        whole   process  is  aborted   with  IncompleteSubmissionError
        exception. The subjobs which  have already been submitted stay
        submitted.

        The default implementation does not process the masterjobconfig.
        Therefore this method may be overriden in the derived class
        in the following way:

        def master_submit(self,masterjobconfig,subjobconfigs,keep_going):
           ... 
           do_some_processsing_of(masterjobconfig)
           ...
           return IBackend.master_submit(self,subjobconfigs,masterjobconfig,keep_joing)


        Implementation note: we set keep_going to be optional in the
        signature of IBackend.master_submit() to allow the existing
        backend implementations which do not support keep_going=True
        and which at some point may call IBackend.master_submit() to
        work without change. It may sometimes be non-trivial to enable
        support for keep_going=True in some backends, even if the
        finally call IBackend.master_submit(). Therefore it is left to
        the decision of backend developer to explicitly enable the
        support for keep_going flag.

        """
        from Ganga.Core import IncompleteJobSubmissionError, GangaException
        from Ganga.Utility.logging import log_user_exception

        job = self.getJobObject()
        logger.debug("SubJobConfigs: %s" % len(subjobconfigs))
        logger.debug("rjobs: %s" % len(rjobs))
        assert(implies(rjobs, len(subjobconfigs) == len(rjobs)))

        incomplete = 0
        incomplete_subjobs = []

        def handleError(x):
            if keep_going:
                incomplete_subjobs.append(fqid)
                return False
            else:
                if incomplete:
                    raise x
                else:
                    return True

        master_input_sandbox = self.master_prepare(masterjobconfig)

        if parallel_submit:

            from Ganga.GPI import queues

            threads_before = queues.totalNumIntThreads()

            for sc, sj in zip(subjobconfigs, rjobs):

                fqid = sj.getFQID('.')
                b = sj.backend
                # FIXME would be nice to move this to the internal threads not user ones
                #from Ganga.GPIDev.Base.Proxy import stripProxy
                #all_queues = stripProxy(queues)
                #all_queues._addSystem( self._parallel_submit, ( b, sj, sc, master_input_sandbox, fqid, logger ) )
                queues._monitoring_threadpool.add_function(self._parallel_submit, (b, sj, sc, master_input_sandbox, fqid, logger))

            def subjob_status_check(rjobs):
                has_submitted = True
                for sj in rjobs:
                    if sj.status not in ["submitted","failed","completed","running","completing"]:
                        has_submitted = False
                        break
                return has_submitted

            while not subjob_status_check(rjobs):
                import time
                time.sleep(1.)

            for i in rjobs:
                if i.status in ["new", "failed"]:
                    return 0
            return 1

        for sc, sj in zip(subjobconfigs, rjobs):

            fqid = sj.getFQID('.')
            logger.info("submitting job %s to %s backend", fqid, sj.backend._name)
            try:
                b = sj.backend
                sj.updateStatus('submitting')
                if b.submit(sc, master_input_sandbox):
                    sj.updateStatus('submitted')
                    # sj._commit() # PENDING: TEMPORARY DISABLED
                    incomplete = 1
                    sj.info.increment()
                else:
                    if handleError(IncompleteJobSubmissionError(fqid, 'submission failed')):
                        return 0
            except Exception as x:
                #sj.updateStatus('new')
                if isinstance(x, GangaException):
                    logger.error(str(x))
                    log_user_exception(logger, debug=True)
                else:
                    log_user_exception(logger, debug=False)
                if handleError(IncompleteJobSubmissionError(fqid, str(x))):
                    return 0

        if incomplete_subjobs:
            raise IncompleteJobSubmissionError(
                incomplete_subjobs, 'submission failed')

        return 1
Ejemplo n.º 14
0
    def master_submit(self, rjobs, subjobconfigs, masterjobconfig, keep_going=False, parallel_submit=False):
        """  Submit   the  master  job  and  all   its  subjobs.   The
        masterjobconfig  is  shared,  individual  subjob  configs  are
        defined  in  subjobconfigs.   Submission  of  individual  jobs
        (not-split) also  always goes via  this method.  In  that case
        the subjobconfigs contains just one element - the job itself.

        The default  implementation of  this method emulates  the bulk
        submission  calling  a submit()  method  on individual  subjob
        objects.  If submission  of any of the subjobs  fails then the
        whole   process  is  aborted   with  IncompleteSubmissionError
        exception. The subjobs which  have already been submitted stay
        submitted.

        The default implementation does not process the masterjobconfig.
        Therefore this method may be overriden in the derived class
        in the following way:

        def master_submit(self,masterjobconfig,subjobconfigs,keep_going):
           ... 
           do_some_processsing_of(masterjobconfig)
           ...
           return IBackend.master_submit(self,subjobconfigs,masterjobconfig,keep_joing)


        Implementation note: we set keep_going to be optional in the
        signature of IBackend.master_submit() to allow the existing
        backend implementations which do not support keep_going=True
        and which at some point may call IBackend.master_submit() to
        work without change. It may sometimes be non-trivial to enable
        support for keep_going=True in some backends, even if the
        finally call IBackend.master_submit(). Therefore it is left to
        the decision of backend developer to explicitly enable the
        support for keep_going flag.

        """
        from Ganga.Utility.logging import log_user_exception

        logger.debug("SubJobConfigs: %s" % len(subjobconfigs))
        logger.debug("rjobs: %s" % len(rjobs))
        assert(implies(rjobs, len(subjobconfigs) == len(rjobs)))

        incomplete = 0
        incomplete_subjobs = []

        def handleError(x):
            if keep_going:
                incomplete_subjobs.append(fqid)
                return False
            else:
                if incomplete:
                    raise x
                else:
                    return True

        master_input_sandbox = self.master_prepare(masterjobconfig)
        # Shall we submit in parallel
        if parallel_submit:

            from Ganga.Core.GangaThread.WorkerThreads import getQueues

            threads_before = getQueues().totalNumIntThreads()

            for sc, sj in zip(subjobconfigs, rjobs):

                b = sj.backend

                # Must check for credentials here as we cannot handle missing credentials on Queues by design!
                if hasattr(b, 'credential_requirements') and b.credential_requirements is not None:
                    from Ganga.GPIDev.Credentials.CredentialStore import credential_store
                    try:
                        cred = credential_store[b.credential_requirements]
                    except GangaKeyError:
                        credential_store.create(b.credential_requirements)

                fqid = sj.getFQID('.')
                # FIXME would be nice to move this to the internal threads not user ones
                getQueues()._monitoring_threadpool.add_function(self._parallel_submit, (b, sj, sc, master_input_sandbox, fqid, logger), callback_func = self._successfulSubmit, callback_args = (sj, incomplete_subjobs))

            def subjob_status_check(rjobs):
                has_submitted = True
                for sj in rjobs:
                    if sj.status not in ["submitted","failed","completed","running","completing"] and sj.getFQID('.') not in incomplete_subjobs:
                        has_submitted = False
                        break
                return has_submitted

            while not subjob_status_check(rjobs):
                import time
                time.sleep(1.)

            if incomplete_subjobs:
                raise IncompleteJobSubmissionError(
                    incomplete_subjobs, 'submission failed for subjobs %s' % incomplete_subjobs)
            return 1

        # Alternatively submit sequentially
        for sc, sj in zip(subjobconfigs, rjobs):

            fqid = sj.getFQID('.')
            logger.info("submitting job %s to %s backend", fqid, getName(sj.backend))
            try:
                b = stripProxy(sj.backend)
                sj.updateStatus('submitting')
                if b.submit(sc, master_input_sandbox):
                    sj.updateStatus('submitted')
                    # sj._commit() # PENDING: TEMPORARY DISABLED
                    incomplete = 1
                    stripProxy(sj.info).increment()
                else:
                    if handleError(IncompleteJobSubmissionError(fqid, 'submission failed')):
                        raise IncompleteJobSubmissionError(fqid, 'submission failed')
            except Exception as x:
                sj.updateStatus('new')
                if isType(x, GangaException):
                    logger.error("%s" % x)
                    log_user_exception(logger, debug=True)
                else:
                    log_user_exception(logger, debug=False)
                raise IncompleteJobSubmissionError(fqid, 'submission failed')

        return 1
Ejemplo n.º 15
0
    def master_submit(self,
                      rjobs,
                      subjobconfigs,
                      masterjobconfig,
                      keep_going=False,
                      parallel_submit=False):
        """  Submit   the  master  job  and  all   its  subjobs.   The
        masterjobconfig  is  shared,  individual  subjob  configs  are
        defined  in  subjobconfigs.   Submission  of  individual  jobs
        (not-split) also  always goes via  this method.  In  that case
        the subjobconfigs contains just one element - the job itself.

        The default  implementation of  this method emulates  the bulk
        submission  calling  a submit()  method  on individual  subjob
        objects.  If submission  of any of the subjobs  fails then the
        whole   process  is  aborted   with  IncompleteSubmissionError
        exception. The subjobs which  have already been submitted stay
        submitted.

        The default implementation does not process the masterjobconfig.
        Therefore this method may be overriden in the derived class
        in the following way:

        def master_submit(self,masterjobconfig,subjobconfigs,keep_going):
           ... 
           do_some_processsing_of(masterjobconfig)
           ...
           return IBackend.master_submit(self,subjobconfigs,masterjobconfig,keep_joing)


        Implementation note: we set keep_going to be optional in the
        signature of IBackend.master_submit() to allow the existing
        backend implementations which do not support keep_going=True
        and which at some point may call IBackend.master_submit() to
        work without change. It may sometimes be non-trivial to enable
        support for keep_going=True in some backends, even if the
        finally call IBackend.master_submit(). Therefore it is left to
        the decision of backend developer to explicitly enable the
        support for keep_going flag.

        """
        from Ganga.Core import IncompleteJobSubmissionError, GangaException
        from Ganga.Utility.logging import log_user_exception

        logger.debug("SubJobConfigs: %s" % len(subjobconfigs))
        logger.debug("rjobs: %s" % len(rjobs))
        assert (implies(rjobs, len(subjobconfigs) == len(rjobs)))

        incomplete = 0
        incomplete_subjobs = []

        def handleError(x):
            if keep_going:
                incomplete_subjobs.append(fqid)
                return False
            else:
                if incomplete:
                    raise x
                else:
                    return True

        master_input_sandbox = self.master_prepare(masterjobconfig)

        if parallel_submit:

            from Ganga.GPI import queues

            threads_before = queues.totalNumIntThreads()

            for sc, sj in zip(subjobconfigs, rjobs):

                fqid = sj.getFQID('.')
                b = sj.backend
                # FIXME would be nice to move this to the internal threads not user ones
                #from Ganga.GPIDev.Base.Proxy import stripProxy
                #all_queues = stripProxy(queues)
                #all_queues._addSystem( self._parallel_submit, ( b, sj, sc, master_input_sandbox, fqid, logger ) )
                queues._monitoring_threadpool.add_function(
                    self._parallel_submit,
                    (b, sj, sc, master_input_sandbox, fqid, logger))

            def subjob_status_check(rjobs):
                has_submitted = True
                for sj in rjobs:
                    if sj.status not in [
                            "submitted", "failed", "completed", "running",
                            "completing"
                    ]:
                        has_submitted = False
                        break
                return has_submitted

            while not subjob_status_check(rjobs):
                import time
                time.sleep(1.)

            for i in rjobs:
                if i.status in ["new", "failed"]:
                    return 0
            return 1

        for sc, sj in zip(subjobconfigs, rjobs):

            fqid = sj.getFQID('.')
            logger.info("submitting job %s to %s backend", fqid,
                        getName(sj.backend))
            try:
                b = stripProxy(sj.backend)
                sj.updateStatus('submitting')
                if b.submit(sc, master_input_sandbox):
                    sj.updateStatus('submitted')
                    # sj._commit() # PENDING: TEMPORARY DISABLED
                    incomplete = 1
                    stripProxy(sj.info).increment()
                else:
                    if handleError(
                            IncompleteJobSubmissionError(
                                fqid, 'submission failed')):
                        return 0
            except Exception as x:
                #sj.updateStatus('new')
                if isType(x, GangaException):
                    logger.error(str(x))
                    log_user_exception(logger, debug=True)
                else:
                    log_user_exception(logger, debug=False)
                if handleError(IncompleteJobSubmissionError(fqid, str(x))):
                    return 0

        if incomplete_subjobs:
            raise IncompleteJobSubmissionError(incomplete_subjobs,
                                               'submission failed')

        return 1
Ejemplo n.º 16
0
    def master_submit(self,rjobs,subjobconfigs,masterjobconfig,keep_going=False):

        """  Submit   the  master  job  and  all   its  subjobs.   The
        masterjobconfig  is  shared,  individual  subjob  configs  are
        defined  in  subjobconfigs.   Submission  of  individual  jobs
        (not-split) also  always goes via  this method.  In  that case
        the subjobconfigs contains just one element - the job itself.

        The default  implementation of  this method emulates  the bulk
        submission  calling  a submit()  method  on individual  subjob
        objects.  If submission  of any of the subjobs  fails then the
        whole   process  is  aborted   with  IncompleteSubmissionError
        exception. The subjobs which  have already been submitted stay
        submitted.

        The default implementation does not process the masterjobconfig.
        Therefore this method may be overriden in the derived class
        in the following way:

        def master_submit(self,masterjobconfig,subjobconfigs,keep_going):
           ... 
           do_some_processsing_of(masterjobconfig)
           ...
           return IBackend.master_submit(self,subjobconfigs,masterjobconfig,keep_joing)
        

        Implementation note: we set keep_going to be optional in the
        signature of IBackend.master_submit() to allow the existing
        backend implementations which do not support keep_going=True
        and which at some point may call IBackend.master_submit() to
        work without change. It may sometimes be non-trivial to enable
        support for keep_going=True in some backends, even if the
        finally call IBackend.master_submit(). Therefore it is left to
        the decision of backend developer to explicitly enable the
        support for keep_going flag.

        """
        from Ganga.Core import IncompleteJobSubmissionError, GangaException
        from Ganga.Utility.logging import log_user_exception
        
        job = self.getJobObject()
        assert(implies(rjobs,len(subjobconfigs)==len(rjobs)))

        incomplete = 0
        incomplete_subjobs = []

        def handleError(x):
            if keep_going:
                incomplete_subjobs.append(fqid)
                return False
            else:
                if incomplete:
                    raise x
                else:
                    return True

        master_input_sandbox=self.master_prepare(masterjobconfig)

        for sc,sj in zip(subjobconfigs,rjobs):
            fqid = sj.getFQID('.')
            logger.info("submitting job %s to %s backend",fqid,sj.backend._name)
            try:
                b = sj.backend
                sj.updateStatus('submitting')
                if b.submit(sc,master_input_sandbox):
                    sj.updateStatus('submitted')
                    #sj._commit() # PENDING: TEMPORARY DISABLED
                    incomplete = 1
                else:
                    if handleError(IncompleteJobSubmissionError(fqid,'submission failed')):
                        return 0
            except Exception,x:
                sj.updateStatus('new')
                if isinstance(x,GangaException):
                    logger.error(str(x))
                    log_user_exception(logger,debug = True)
                else:
                    log_user_exception(logger,debug = False)
                if handleError(IncompleteJobSubmissionError(fqid,str(x))):
                    return 0
Ejemplo n.º 17
0
        for c in config:
            logger.error('%s = %s',c,config[c])
        s = 'Cannot connect to the repository: '+str(x)
        logger.error(s)        
        return s

    reps = []
    try:
        for n in names:
            reps.append(factory(dir = os.path.join(getLocalRoot(),version,n)))
    except RepositoryError,x:
        s = print_error(x)
        raise
    except Exception,x:
        s = print_error(x)
        log_user_exception(logger)
        raise
        
    from Ganga.GPIDev.Lib.JobRegistry import JobRegistryInstance, JobRegistryInterface, allJobRegistries

    regs = map(lambda x: JobRegistryInstance(*x), zip(names,reps))

    for n,r in zip(names,regs):
        allJobRegistries['native_'+n] = r
        if n == 'jobs' and config['DEBUG_startup_profile']:
            PROFN = 'xml.startup.profile.txt'
            print 'profiling ON, saving status to',PROFN
            import profile
            profile.runctx('r._scan_repository()',globals(),{'r':r},PROFN)
        else:
            try: