Ejemplo n.º 1
0
    def updateMonitoringInformation(jobs):
        '''Monitoring loop for normal jobs'''

        import datetime

        backenddict = {}
        jobdict = {}
        for j in jobs:
            if j.backend.id and (
                (datetime.datetime.utcnow() - j.time.timestamps["submitted"]
                 ).seconds > config["ArcWaitTimeBeforeStartingMonitoring"]):
                jobdict[j.backend.id] = j
                backenddict[j.backend.actualCE] = j

        if len(jobdict.keys()) == 0:
            return

        jobInfoDict = Grid.arc_status(jobdict.keys(), backenddict.keys())
        jidListForPurge = []

        # update job information for those available in jobInfoDict
        for id, info in jobInfoDict.items():

            if info:

                job = jobdict[id]

                if job.backend.actualCE != urlparse(id)[1].split(":")[0]:
                    job.backend.actualCE = urlparse(id)[1].split(":")[0]

                if job.backend.status != info['State']:

                    doStatusUpdate = True

                    # no need to update Ganga job status if backend status is
                    # not changed
                    if info['State'] == job.backend.status:
                        doStatusUpdate = False

                    # download output sandboxes if final status is reached
                    elif info['State'] in [
                            'Finished', '(FINISHED)', 'Finished (FINISHED)'
                    ]:

                        # grab output sandbox
                        if Grid.arc_get_output(
                                job.backend.id,
                                job.getOutputWorkspace(create=True).getPath()):
                            (ick, app_exitcode) = Grid.__get_app_exitcode__(
                                job.getOutputWorkspace(create=True).getPath())
                            job.backend.exitcode = app_exitcode

                            jidListForPurge.append(job.backend.id)

                        else:
                            logger.error('fail to download job output: %s' %
                                         jobdict[id].getFQID('.'))

                    if doStatusUpdate:
                        job.backend.status = info['State']
                        if 'Exit Code' in info:
                            try:
                                job.backend.exitcode_arc = int(
                                    info['Exit Code'])
                            except:
                                job.backend.exitcode_arc = 1

                        if 'Job Error' in info:
                            try:
                                job.backend.reason = info['Job Error']
                            except:
                                pass

                        job.backend.updateGangaJobStatus()
            else:
                logger.warning('fail to retrieve job informaton: %s' %
                               jobdict[id].getFQID('.'))

        # purging the jobs the output has been fetched locally
        if jidListForPurge:
            if not Grid.arc_purgeMultiple(jidListForPurge):
                logger.warning("Failed to purge all ARC jobs.")
Ejemplo n.º 2
0
    def updateMonitoringInformation(jobs):
        '''Monitoring loop for normal jobs'''

        import datetime

        ce_list = []  # type: List[str]
        jobdict = {}  # type: Mapping[str, Job]
        for j in jobs:
            if j.backend.id and ((datetime.datetime.utcnow() - j.time.timestamps["submitted"]).seconds > config["ArcWaitTimeBeforeStartingMonitoring"]):
                jobdict[j.backend.id] = j
                ce_list.append(j.backend.actualCE)

        if len(jobdict.keys()) == 0:
            return

        # Group jobs by the backend's credential requirements
        cred_to_backend_id_list = defaultdict(list)  # type: Mapping[ICredentialRequirement, List[str]]
        for jid, job in jobdict.items():
            cred_to_backend_id_list[job.backend.credential_requirements].append(jid)

        # Batch the status requests by credential requirement
        jobInfoDict = {}
        for cred_req, job_ids in cred_to_backend_id_list.items():
            # If the credential is not valid or doesn't exist then skip it
            cred = credential_store.get(cred_req)
            if not cred or not cred.is_valid():
                    needed_credentials.add(cred_req)
                    continue
            # Create a ``Grid`` for each credential requirement and request the relevant jobs through it
            info = Grid.arc_status(job_ids, ce_list, cred_req)
            jobInfoDict.update(info)

        jidListForPurge = []

        # update job information for those available in jobInfoDict
        for id, info in jobInfoDict.items():

            if info:

                job = jobdict[id]

                if job.backend.actualCE != urlparse(id)[1].split(":")[0]:
                    job.backend.actualCE = urlparse(id)[1].split(":")[0]

                if job.backend.status != info['State']:

                    doStatusUpdate = True

                    # no need to update Ganga job status if backend status is
                    # not changed
                    if info['State'] == job.backend.status:
                        doStatusUpdate = False

                    # download output sandboxes if final status is reached
                    elif info['State'] in ['Finished', '(FINISHED)', 'Finished (FINISHED)']:

                        # grab output sandbox
                        if Grid.arc_get_output(job.backend.id, job.getOutputWorkspace(create=True).getPath(), job.backend.credential_requirements):
                            (ick, app_exitcode) = Grid.__get_app_exitcode__(
                                job.getOutputWorkspace(create=True).getPath())
                            job.backend.exitcode = app_exitcode

                            jidListForPurge.append(job.backend.id)

                        else:
                            logger.error(
                                'fail to download job output: %s' % jobdict[id].getFQID('.'))

                    if doStatusUpdate:
                        job.backend.status = info['State']
                        if 'Exit Code' in info:
                            try:
                                job.backend.exitcode_arc = int(
                                    info['Exit Code'])
                            except:
                                job.backend.exitcode_arc = 1

                        if 'Job Error' in info:
                            try:
                                job.backend.reason = info['Job Error']
                            except:
                                pass

                        job.backend.updateGangaJobStatus()
            else:
                logger.warning(
                    'fail to retrieve job informaton: %s' % jobdict[id].getFQID('.'))

        # purging the jobs the output has been fetched locally
        if jidListForPurge:
            for cred_req, job_ids in cred_to_backend_id_list.items():
                if not Grid.arc_purge_multiple(set(job_ids) & set(jidListForPurge), cred_req):
                    logger.warning("Failed to purge all ARC jobs.")
Ejemplo n.º 3
0
    def updateMonitoringInformation(jobs):
        '''Monitoring loop for normal jobs'''

        import datetime

        ce_list = []  # type: List[str]
        jobdict = {}  # type: Mapping[str, Job]
        for j in jobs:
            if j.backend.id and (
                (datetime.datetime.utcnow() - j.time.timestamps["submitted"]
                 ).seconds > config["ArcWaitTimeBeforeStartingMonitoring"]):
                jobdict[j.backend.id] = j
                ce_list.append(j.backend.actualCE)

        if len(jobdict.keys()) == 0:
            return

        # Group jobs by the backend's credential requirements
        cred_to_backend_id_list = defaultdict(
            list)  # type: Mapping[ICredentialRequirement, List[str]]
        for jid, job in jobdict.items():
            cred_to_backend_id_list[
                job.backend.credential_requirements].append(jid)

        # Batch the status requests by credential requirement
        jobInfoDict = {}
        for cred_req, job_ids in cred_to_backend_id_list.items():
            # If the credential is not valid or doesn't exist then skip it
            cred = credential_store.get(cred_req)
            if not cred or not cred.is_valid():
                needed_credentials.add(cred_req)
                continue
            # Create a ``Grid`` for each credential requirement and request the relevant jobs through it
            info = Grid.arc_status(job_ids, ce_list, cred_req)
            jobInfoDict.update(info)

        jidListForPurge = []

        # update job information for those available in jobInfoDict
        for id, info in jobInfoDict.items():

            if info:

                job = jobdict[id]

                if job.backend.actualCE != urlparse(id)[1].split(":")[0]:
                    job.backend.actualCE = urlparse(id)[1].split(":")[0]

                if job.backend.status != info['State']:

                    doStatusUpdate = True

                    # no need to update Ganga job status if backend status is
                    # not changed
                    if info['State'] == job.backend.status:
                        doStatusUpdate = False

                    # download output sandboxes if final status is reached
                    elif info['State'] in [
                            'Finished', '(FINISHED)', 'Finished (FINISHED)'
                    ]:

                        # grab output sandbox
                        if Grid.arc_get_output(
                                job.backend.id,
                                job.getOutputWorkspace(create=True).getPath(),
                                job.backend.credential_requirements):
                            (ick, app_exitcode) = Grid.__get_app_exitcode__(
                                job.getOutputWorkspace(create=True).getPath())
                            job.backend.exitcode = app_exitcode

                            jidListForPurge.append(job.backend.id)

                        else:
                            logger.error('fail to download job output: %s' %
                                         jobdict[id].getFQID('.'))

                    if doStatusUpdate:
                        job.backend.status = info['State']
                        if 'Exit Code' in info:
                            try:
                                job.backend.exitcode_arc = int(
                                    info['Exit Code'])
                            except:
                                job.backend.exitcode_arc = 1

                        if 'Job Error' in info:
                            try:
                                job.backend.reason = info['Job Error']
                            except:
                                pass

                        job.backend.updateGangaJobStatus()
            else:
                logger.warning('fail to retrieve job informaton: %s' %
                               jobdict[id].getFQID('.'))

        # purging the jobs the output has been fetched locally
        if jidListForPurge:
            for cred_req, job_ids in cred_to_backend_id_list.items():
                if not Grid.arc_purge_multiple(
                        set(job_ids) & set(jidListForPurge), cred_req):
                    logger.warning("Failed to purge all ARC jobs.")
Ejemplo n.º 4
0
    def updateMonitoringInformation(jobs):
        '''Monitoring loop for normal jobs'''

        import datetime

        backenddict = {}
        jobdict = {}
        for j in jobs:
            if j.backend.id and ((datetime.datetime.utcnow() - j.time.timestamps["submitted"]).seconds > config["ArcWaitTimeBeforeStartingMonitoring"]):
                jobdict[j.backend.id] = j
                backenddict[j.backend.actualCE] = j

        if len(jobdict.keys()) == 0:
            return

        jobInfoDict = Grid.arc_status(
            jobdict.keys(), backenddict.keys())
        jidListForPurge = []

        # update job information for those available in jobInfoDict
        for id, info in jobInfoDict.items():

            if info:

                job = jobdict[id]

                if job.backend.actualCE != urlparse(id)[1].split(":")[0]:
                    job.backend.actualCE = urlparse(id)[1].split(":")[0]

                if job.backend.status != info['State']:

                    doStatusUpdate = True

                    # no need to update Ganga job status if backend status is
                    # not changed
                    if info['State'] == job.backend.status:
                        doStatusUpdate = False

                    # download output sandboxes if final status is reached
                    elif info['State'] in ['Finished', '(FINISHED)', 'Finished (FINISHED)']:

                        # grab output sandbox
                        if Grid.arc_get_output(job.backend.id, job.getOutputWorkspace(create=True).getPath()):
                            (ick, app_exitcode) = Grid.__get_app_exitcode__(
                                job.getOutputWorkspace(create=True).getPath())
                            job.backend.exitcode = app_exitcode

                            jidListForPurge.append(job.backend.id)

                        else:
                            logger.error(
                                'fail to download job output: %s' % jobdict[id].getFQID('.'))

                    if doStatusUpdate:
                        job.backend.status = info['State']
                        if 'Exit Code' in info:
                            try:
                                job.backend.exitcode_arc = int(
                                    info['Exit Code'])
                            except:
                                job.backend.exitcode_arc = 1

                        if 'Job Error' in info:
                            try:
                                job.backend.reason = info['Job Error']
                            except:
                                pass

                        job.backend.updateGangaJobStatus()
            else:
                logger.warning(
                    'fail to retrieve job informaton: %s' % jobdict[id].getFQID('.'))

        # purging the jobs the output has been fetched locally
        if jidListForPurge:
            if not Grid.arc_purgeMultiple(jidListForPurge):
                logger.warning("Failed to purge all ARC jobs.")