Ejemplo n.º 1
0
def test_result_ok():

    assert not result_ok(None), "Didn't return False with None arg"
    assert not result_ok(''), "Didn't return False with non-dict arg"
    assert not result_ok({}), "Didn't return False as default dict extraction"
    assert not result_ok({'OK': False}), "OK not handled properly"
    assert result_ok({'OK': True}), "Didn't return True"
Ejemplo n.º 2
0
def test_result_ok():

    assert not result_ok(None), "Didn't return False with None arg"
    assert not result_ok(''), "Didn't return False with non-dict arg"
    assert not result_ok({}), "Didn't return False as default dict extraction"
    assert not result_ok({'OK': False}), "OK not handled properly"
    assert result_ok({'OK': True}), "Didn't return True"
Ejemplo n.º 3
0
def test_result_ok():
    from GangaDirac.Lib.Backends.DiracUtils import result_ok

    assert not result_ok(None), "Didn't return False with None arg"
    assert not result_ok(''), "Didn't return False with non-dict arg"
    assert not result_ok({}), "Didn't return False as default dict extraction"
    assert not result_ok({'OK': False}), "OK not handled properly"
    assert result_ok({'OK': True}), "Didn't return True"
Ejemplo n.º 4
0
    def test_result_ok(self):

        self.assertFalse(result_ok(None), "Didn't return False with None arg")
        self.assertFalse(
            result_ok(''),  "Didn't return False with non-dict arg")
        self.assertFalse(
            result_ok({}),  "Didn't return False as default dict extraction")
        self.assertFalse(result_ok({'OK': False}), "OK not handled properly")
        self.assertTrue(result_ok({'OK': True}), "Didn't return True")
Ejemplo n.º 5
0
    def test_result_ok(self):

        self.assertFalse(result_ok(None), "Didn't return False with None arg")
        self.assertFalse(result_ok(''),
                         "Didn't return False with non-dict arg")
        self.assertFalse(result_ok({}),
                         "Didn't return False as default dict extraction")
        self.assertFalse(result_ok({'OK': False}), "OK not handled properly")
        self.assertTrue(result_ok({'OK': True}), "Didn't return True")
Ejemplo n.º 6
0
def DiracRunSplitter(inputs, filesPerJob, maxFiles, ignoremissing):
    """
    Generator that yields datasets for dirac split jobs by run
    """

    metadata = inputs.bkMetadata()
    if not result_ok(metadata):
        logger.error('Error getting input metadata: %s' % str(metadata))
        raise SplittingError('Error splitting files.')
    if metadata['Value']['Failed']:
        logger.error('Error getting part of metadata')
        raise SplittingError('Error splitting files.')

    runs = defaultdict(list)
    for lfn,v in metadata['Value']['Successful'].items():
        f = [f for f in inputs.files if f.lfn == lfn][0]
        runs[v['RunNumber']].append(f)
    logger.info('Found %d runs in inputdata'%len(runs))

    for run,files in sorted(runs.items()):
        run_inputs = inputs.__class__()
        run_inputs.files = files
        if len(files) > filesPerJob:
            datasets = list(DiracSplitter(run_inputs, filesPerJob, None, ignoremissing))
        else:
            datasets = [files]
        logger.info('Run %d with %d files was split in %d subjobs'%(run, len(files), len(datasets)))
        for ds in datasets:
            yield ds
Ejemplo n.º 7
0
def DiracSizeSplitter(inputs, filesPerJob, maxSize, ignoremissing):
    """
    Generator that yields a datasets for LHCbdirac split jobs by size
    """
    #logger.debug( "DiracSplitter" )
    #logger.debug( "inputs: %s" % str( inputs ) )
    split_files = []
    i = inputs.__class__()

    if inputs.getLFNs() != len(inputs.files):
        raise SplittingError(
            "Error trying to split dataset using DIRAC backend with non-DiracFile in the inputdata")

    all_files = igroup(inputs.files[:maxFiles], getConfig('DIRAC')['splitFilesChunks'],
                       leftovers=True)

    #logger.debug( "Looping over all_files" )
    #logger.debug( "%s" % str( all_files ) )

    for files in all_files:

        i.files = files

        LFNsToSplit = i.getLFNs()

        if(len(LFNsToSplit)) > 1:

            result = execute('splitInputDataBySize(%s,%d)'
                             % (i.getLFNs(), filesPerJob))

            if not result_ok(result):
                logger.error('DIRAC:: Error splitting files: %s' % str(result))
                raise SplittingError('Error splitting files.')

            split_files += result.get('Value', [])

        else:

            split_files = [LFNsToSplit]

    if len(split_files) == 0:
        raise SplittingError('An unknown error occured.')

    # FIXME
    # check that all files were available on the grid
    big_list = []
    for l in split_files:
        big_list.extend(l)
    diff = set(inputs.getFileNames()[:maxFiles]).difference(big_list)
    if len(diff) > 0:
        for f in diff:
            logger.warning('Ignored file: %s' % f)
        if not ignoremissing:
            raise SplittingError('Some files not found!')
    ###

    logger.debug("Split Files: %s" % str(split_files))

    for dataset in split_files:
        yield dataset
Ejemplo n.º 8
0
 def checkTier1s(self):
     cmd = 'checkTier1s()'
     result = execute(cmd, cred_req=self.credential_requirements)
     if not result_ok(result):
         logger.warning('Could not obtain Tier-1 info: %s' % str(result))
         return
     return result.get('Value', {})
Ejemplo n.º 9
0
 def checkTier1s(self):
     cmd = 'checkTier1s()'
     result = execute(cmd)
     if not result_ok(result):
         logger.warning('Could not obtain Tier-1 info: %s' % str(result))
         return
     return result.get('Value', {})
Ejemplo n.º 10
0
def DiracRunSplitter(inputs, filesPerJob, maxFiles, ignoremissing):
    """
    Generator that yields datasets for dirac split jobs by run
    """

    metadata = inputs.bkMetadata()
    if not result_ok(metadata):
        logger.error('Error getting input metadata: %s' % str(metadata))
        raise SplittingError('Error splitting files.')
    if metadata['Value']['Failed']:
        logger.error('Error getting part of metadata')
        raise SplittingError('Error splitting files.')

    runs = defaultdict(list)
    for lfn, v in metadata['Value']['Successful'].items():
        f = [f for f in inputs.files if f.lfn == lfn][0]
        runs[v['RunNumber']].append(f)
    logger.info('Found %d runs in inputdata' % len(runs))

    for run, files in sorted(runs.items()):
        run_inputs = inputs.__class__()
        run_inputs.files = files
        if len(files) > filesPerJob:
            datasets = list(
                DiracSplitter(run_inputs, filesPerJob, None, ignoremissing))
        else:
            datasets = [files]
        logger.info('Run %d with %d files was split in %d subjobs' %
                    (run, len(files), len(datasets)))
        for ds in datasets:
            yield ds
Ejemplo n.º 11
0
 def checkTier1s(self):
     cmd = "checkTier1s()"
     result = execute(cmd, cred_req=self.credential_requirements)
     if not result_ok(result):
         logger.warning("Could not obtain Tier-1 info: %s" % str(result))
         return
     return result.get("Value", {})
Ejemplo n.º 12
0
 def peek(self, filename=None, command=None):
     """Peek at the output of a job (Note: filename/command are ignored)."""
     dirac_cmd = 'peek(%d)' % self.id
     result = execute(dirac_cmd)
     if result_ok(result):
         logger.info(result['Value'])
     else:
         logger.error("No peeking available for Dirac job '%i'.", self.id)
Ejemplo n.º 13
0
 def debug(self):
     '''Obtains some (possibly) useful DIRAC debug info. '''
     # check services
     cmd = 'getServicePorts()'
     result = execute(cmd)
     if type(result) == str:
         import datetime
         try:
             result = eval(result)
         except Exception as err:
             logger.debug("Exception, err: %s" % str(err))
             pass
     if not result_ok(result):
         logger.warning('Could not obtain services: %s' % str(result))
         return
     services = result.get('Value', {})
     for category in services:
         system, service = category.split('/')
         cmd = "ping('%s','%s')" % (system, service)
         result = execute(cmd)
         if type(result) == str:
             import datetime
             try:
                 result = eval(result)
             except Exception as err:
                 logger.debug("Exception: %s" % str(err))
                 pass
         msg = 'OK.'
         if not result_ok(result):
             msg = '%s' % result['Message']
         logger.info('%s: %s' % (category, msg))
     # get pilot info for this job
     if type(self.id) != int:
         return
     j = self.getJobObject()
     cwd = os.getcwd()
     debug_dir = j.getDebugWorkspace().getPath()
     cmd = "getJobPilotOutput(%d,'%s')" % \
           (self.id, debug_dir)
     result = execute(cmd)
     if result_ok(result):
         logger.info('Pilot Info: %s/pilot_%d/std.out.' %
                     (debug_dir, self.id))
     else:
         logger.error(result.get('Message', ''))
Ejemplo n.º 14
0
 def kill(self):
     """ Kill a Dirac jobs"""
     if not self.id:
         return None
     dirac_cmd = 'kill(%d)' % self.id
     result = execute(dirac_cmd)
     if not result_ok(result):
         raise BackendError('Dirac', 'Could not kill job: %s' % str(result))
     return result['OK']
Ejemplo n.º 15
0
 def peek(self, filename=None, command=None):
     """Peek at the output of a job (Note: filename/command are ignored).
     Args:
         filename (str): Ignored but is filename of a file in the sandbox
         command (str): Ignored but is a command which could be executed"""
     dirac_cmd = 'peek(%d)' % self.id
     result = execute(dirac_cmd)
     if result_ok(result):
         logger.info(result['Value'])
     else:
         logger.error("No peeking available for Dirac job '%i'.", self.id)
Ejemplo n.º 16
0
 def peek(self, filename=None, command=None):
     """Peek at the output of a job (Note: filename/command are ignored).
     Args:
         filename (str): Ignored but is filename of a file in the sandbox
         command (str): Ignored but is a command which could be executed"""
     dirac_cmd = 'peek(%d)' % self.id
     result = execute(dirac_cmd)
     if result_ok(result):
         logger.info(result['Value'])
     else:
         logger.error("No peeking available for Dirac job '%i'.", self.id)
Ejemplo n.º 17
0
    def getOutputSandbox(self, outputDir=None):
        j = self.getJobObject()
        if outputDir is None:
            outputDir = j.getOutputWorkspace().getPath()
        dirac_cmd = "getOutputSandbox(%d,'%s')"  % (self.id, outputDir)
        result = execute(dirac_cmd)
        if not result_ok(result):
            msg = 'Problem retrieving output: %s' % str(result)
            logger.warning(msg)
            return False

        return True
Ejemplo n.º 18
0
    def getOutputSandbox(self, outputDir=None):
        """Get the outputsandbox for the job object controlling this backend
        Args:
            outputDir (str): This string represents the output dir where the sandbox is to be placed
        """
        j = self.getJobObject()
        if outputDir is None:
            outputDir = j.getOutputWorkspace().getPath()
        dirac_cmd = "getOutputSandbox(%d,'%s')"  % (self.id, outputDir)
        result = execute(dirac_cmd)
        if not result_ok(result):
            msg = 'Problem retrieving output: %s' % str(result)
            logger.warning(msg)
            return False

        return True
Ejemplo n.º 19
0
    def getOutputSandbox(self, outputDir=None):
        """Get the outputsandbox for the job object controlling this backend
        Args:
            outputDir (str): This string represents the output dir where the sandbox is to be placed
        """
        j = self.getJobObject()
        if outputDir is None:
            outputDir = j.getOutputWorkspace().getPath()
        dirac_cmd = "getOutputSandbox(%d,'%s')"  % (self.id, outputDir)
        result = execute(dirac_cmd)
        if not result_ok(result):
            msg = 'Problem retrieving output: %s' % str(result)
            logger.warning(msg)
            return False

        return True
Ejemplo n.º 20
0
    def _common_submit(self, dirac_script):
        '''Submit the job via the Dirac server.
        Args:
            dirac_script (str): filename of the JDL which is to be submitted to DIRAC
        '''
        j = self.getJobObject()
        self.id = None
        self.actualCE = None
        self.status = None
        self.extraInfo = None
        self.statusInfo = ''
        j.been_queued = False
        dirac_cmd = """execfile(\'%s\')""" % dirac_script
        result = execute(dirac_cmd)
        # Could use the below code instead to submit on a thread
        # If submitting many then user may terminate ganga before
        # all jobs submitted
#        def submit_checker(result, job, script):
#            err_msg = 'Error submitting job to Dirac: %s' % str(result)
#            if not result_ok(result) or 'Value' not in result:
#                logger.error(err_msg)
#                raise BackendError('Dirac',err_msg)
#
#            idlist = result['Value']
#            if type(idlist) is list:
#                return job._setup_bulk_subjobs(idlist, script)
#            job.id = idlist
#        server.execute_nonblocking(dirac_cmd, callback_func=submit_checker, args=(self, dirac_script))
#        return True

        err_msg = 'Error submitting job to Dirac: %s' % str(result)
        if not result_ok(result) or 'Value' not in result:
            logger.error(err_msg)
            logger.error("\n\n===\n%s\n===\n" % dirac_script)
            logger.error("\n\n====\n")
            with open(dirac_script, 'r') as file_in:
                logger.error("%s" % file_in.read())
            logger.error("\n====\n")
            raise BackendError('Dirac', err_msg)

        idlist = result['Value']
        if type(idlist) is list:
            return self._setup_bulk_subjobs(idlist, dirac_script)

        self.id = idlist
        return type(self.id) == int
Ejemplo n.º 21
0
    def _common_submit(self, dirac_script):
        '''Submit the job via the Dirac server.
        Args:
            dirac_script (str): filename of the JDL which is to be submitted to DIRAC
        '''
        j = self.getJobObject()
        self.id = None
        self.actualCE = None
        self.status = None
        self.extraInfo = None
        self.statusInfo = ''
        j.been_queued = False
        dirac_cmd = """execfile(\'%s\')""" % dirac_script
        result = execute(dirac_cmd)
        # Could use the below code instead to submit on a thread
        # If submitting many then user may terminate ganga before
        # all jobs submitted
#        def submit_checker(result, job, script):
#            err_msg = 'Error submitting job to Dirac: %s' % str(result)
#            if not result_ok(result) or 'Value' not in result:
#                logger.error(err_msg)
#                raise BackendError('Dirac',err_msg)
#
#            idlist = result['Value']
#            if type(idlist) is list:
#                return job._setup_bulk_subjobs(idlist, script)
#            job.id = idlist
#        server.execute_nonblocking(dirac_cmd, callback_func=submit_checker, args=(self, dirac_script))
#        return True

        err_msg = 'Error submitting job to Dirac: %s' % str(result)
        if not result_ok(result) or 'Value' not in result:
            logger.error(err_msg)
            logger.error("\n\n===\n%s\n===\n" % dirac_script)
            logger.error("\n\n====\n")
            with open(dirac_script, 'r') as file_in:
                logger.error("%s" % file_in.read())
            logger.error("\n====\n")
            raise BackendError('Dirac', err_msg)

        idlist = result['Value']
        if type(idlist) is list:
            return self._setup_bulk_subjobs(idlist, dirac_script)

        self.id = idlist
        return type(self.id) == int
Ejemplo n.º 22
0
    def _internal_job_finalisation(job, updated_dirac_status):

        logger = getLogger()

        if updated_dirac_status == 'completed':
            # firstly update job to completing
            DiracBase._getStateTime(job, 'completing')
            if job.status in ['removed', 'killed']:
                return
            if (job.master and job.master.status in ['removed', 'killed']):
                return  # user changed it under us

            job.updateStatus('completing')
            if job.master:
                job.master.updateMasterJobStatus()

            import time
            start = time.time()
            # contact dirac for information
            job.backend.normCPUTime = execute('normCPUTime(%d)' % job.backend.id)
            getSandboxResult = execute("getOutputSandbox(%d,'%s')" % (job.backend.id, job.getOutputWorkspace().getPath()))
            file_info_dict = execute('getOutputDataInfo(%d)' % job.backend.id)
            now = time.time()
            logger.debug('Job ' + job.fqid + ' Time for Dirac metadata : ' + str(now - start))

            logger.debug('Job ' + job.fqid + ' OutputDataInfo: ' + str(file_info_dict))
            logger.debug('Job ' + job.fqid + ' OutputSandbox: ' + str(getSandboxResult))

            # Set DiracFile metadata
            wildcards = [f.namePattern for f in job.outputfiles.get(DiracFile) if regex.search(f.namePattern) is not None]

            with open(os.path.join(job.getOutputWorkspace().getPath(), getConfig('Output')['PostProcessLocationsFileName']), 'ab') as postprocesslocationsfile:
                if not hasattr(file_info_dict, 'keys'):
                    logger.error("Error understanding OutputDataInfo: %s" % str(file_info_dict))
                    from Ganga.Core.exceptions import GangaException
                    raise GangaException("Error understanding OutputDataInfo: %s" % str(file_info_dict))

                for file_name in file_info_dict.get('Value', []):
                    file_name = os.path.basename(file_name)
                    info = file_info_dict.get(file_name)
                    logger.debug("file_name: %s,\tinfo: %s" % (str(file_name), str(info)))

                    valid_wildcards = [wc for wc in wildcards if fnmatch.fnmatch(file_name, wc)]
                    if len(valid_wildcards) == 0: valid_wildcards.append('')

                    if not hasattr(info, 'get'):
                        logger.error("Error getting OutputDataInfo for: %s" % str(job.getFQID('.')))
                        logger.error("Please check the Dirac Job still exists or attempt a job.backend.reset() to try again!")
                        logger.error("Err: %s" % str(info))
                        logger.error("file_info_dict: %s" % str(file_info_dict))
                        from Ganga.Core.exceptions import GangaException
                        raise GangaException("Error getting OutputDataInfo")

                    for wc in valid_wildcards:
                        logger.debug("wildcard: %s" % str(wc))

                        DiracFileData = 'DiracFile:::%s&&%s->%s:::%s:::%s\n' % (wc,
                                                                                file_name,
                                                                                info.get('LFN', 'Error Getting LFN!'),
                                                                                str(info.get('LOCATIONS', ['NotAvailable'])),
                                                                                info.get('GUID', 'NotAvailable')
                                                                                )
                        logger.debug("DiracFileData: %s" % str(DiracFileData))
                        postprocesslocationsfile.write(DiracFileData)

            # check outputsandbox downloaded correctly
            if not result_ok(getSandboxResult):
                logger.warning('Problem retrieving outputsandbox: %s' % str(getSandboxResult))
                DiracBase._getStateTime(job, 'failed')
                if job.status in ['removed', 'killed']:
                    return
                if (job.master and job.master.status in ['removed', 'killed']):
                    return  # user changed it under us
                job.updateStatus('failed')
                if job.master:
                    job.master.updateMasterJobStatus()
                raise BackendError('Problem retrieving outputsandbox: %s' % str(getSandboxResult))

            # finally update job to completed
            DiracBase._getStateTime(job, 'completed')
            if job.status in ['removed', 'killed']:
                return
            if (job.master and job.master.status in ['removed', 'killed']):
                return  # user changed it under us
            job.updateStatus('completed')
            if job.master:
                job.master.updateMasterJobStatus()
            now = time.time()
            logger.debug('Job ' + job.fqid + ' Time for complete update : ' + str(now - start))

        elif updated_dirac_status == 'failed':
            # firstly update status to failed
            DiracBase._getStateTime(job, 'failed')
            if job.status in ['removed', 'killed']:
                return
            if (job.master and job.master.status in ['removed', 'killed']):
                return  # user changed it under us
            job.updateStatus('failed')
            if job.master:
                job.master.updateMasterJobStatus()

            # if requested try downloading outputsandbox anyway
            if getConfig('DIRAC')['failed_sandbox_download']:
                execute("getOutputSandbox(%d,'%s')" %
                        (job.backend.id, job.getOutputWorkspace().getPath()))
        else:
            logger.error("Unexpected dirac status '%s' encountered" % updated_dirac_status)
Ejemplo n.º 23
0
def DiracSplitter(inputs, filesPerJob, maxFiles, ignoremissing):
    """
    Generator that yields a datasets for dirac split jobs
    """
    #logger.debug( "DiracSplitter" )
    #logger.debug( "inputs: %s" % str( inputs ) )
    split_files = []
    i = inputs.__class__()

    if len(inputs.getLFNs()) != len(inputs.files):
        raise SplittingError(
            "Error trying to split dataset using DIRAC backend with non-DiracFile in the inputdata"
        )

    all_files = igroup(inputs.files[:maxFiles],
                       getConfig('DIRAC')['splitFilesChunks'],
                       leftovers=True)

    #logger.debug( "Looping over all_files" )
    #logger.debug( "%s" % str( all_files ) )

    for files in all_files:

        i.files = files

        LFNsToSplit = i.getLFNs()

        if (len(LFNsToSplit)) > 1:

            result = execute('splitInputData(%s, %d)' %
                             (i.getLFNs(), filesPerJob))

            if not result_ok(result):
                logger.error('DIRAC:: Error splitting files: %s' % str(result))
                raise SplittingError('Error splitting files.')

            split_files += result.get('Value', [])

        else:

            split_files = [LFNsToSplit]

    if len(split_files) == 0:
        raise SplittingError('An unknown error occured.')

    # FIXME
    # check that all files were available on the grid
    big_list = []
    for l in split_files:
        big_list.extend(l)
    diff = set(inputs.getFileNames()[:maxFiles]).difference(big_list)
    if len(diff) > 0:
        for f in diff:
            logger.warning('Ignored file: %s' % f)
        if not ignoremissing:
            raise SplittingError('Some files not found!')
    ###

    logger.debug("Split Files: %s" % str(split_files))

    for _dataset in split_files:
        dataset = []
        for _lfn in _dataset:
            dataset.append(DiracFile(lfn=_lfn))
        yield dataset
Ejemplo n.º 24
0
    def _internal_job_finalisation(job, updated_dirac_status):
        """
        This method performs the main job finalisation
        Args:
            job (Job): Thi is the job we want to finalise
            updated_dirac_status (str): String representing the Ganga finalisation state of the job failed/completed
        """

        if updated_dirac_status == 'completed':
            start = time.time()
            # firstly update job to completing
            DiracBase._getStateTime(job, 'completing')
            if job.status in ['removed', 'killed']:
                return
            elif (job.master and job.master.status in ['removed', 'killed']):
                return  # user changed it under us

            job.updateStatus('completing')
            if job.master:
                job.master.updateMasterJobStatus()

            output_path = job.getOutputWorkspace().getPath()

            logger.info('Contacting DIRAC for job: %s' % job.fqid)
            # Contact dirac which knows about the job
            job.backend.normCPUTime, getSandboxResult, file_info_dict, completeTimeResult = execute("finished_job(%d, '%s')" % (job.backend.id, output_path))

            now = time.time()
            logger.info('%0.2fs taken to download output from DIRAC for Job %s' % ((now - start), job.fqid))

            #logger.info('Job ' + job.fqid + ' OutputDataInfo: ' + str(file_info_dict))
            #logger.info('Job ' + job.fqid + ' OutputSandbox: ' + str(getSandboxResult))
            #logger.info('Job ' + job.fqid + ' normCPUTime: ' + str(job.backend.normCPUTime))

            # Set DiracFile metadata
            wildcards = [f.namePattern for f in job.outputfiles.get(DiracFile) if regex.search(f.namePattern) is not None]

            lfn_store = os.path.join(output_path, getConfig('Output')['PostProcessLocationsFileName'])

            # Make the file on disk with a nullop...
            if not os.path.isfile(lfn_store):
                with open(lfn_store, 'w'):
                    pass

            if job.outputfiles.get(DiracFile):

                # Now we can iterate over the contents of the file without touching it
                with open(lfn_store, 'ab') as postprocesslocationsfile:
                    if not hasattr(file_info_dict, 'keys'):
                        logger.error("Error understanding OutputDataInfo: %s" % str(file_info_dict))
                        from Ganga.Core.exceptions import GangaException
                        raise GangaException("Error understanding OutputDataInfo: %s" % str(file_info_dict))

                    ## Caution is not clear atm whether this 'Value' is an LHCbism or bug
                    list_of_files = file_info_dict.get('Value', file_info_dict.keys())

                    for file_name in list_of_files:
                        file_name = os.path.basename(file_name)
                        info = file_info_dict.get(file_name)
                        #logger.debug("file_name: %s,\tinfo: %s" % (str(file_name), str(info)))

                        if not hasattr(info, 'get'):
                            logger.error("Error getting OutputDataInfo for: %s" % str(job.getFQID('.')))
                            logger.error("Please check the Dirac Job still exists or attempt a job.backend.reset() to try again!")
                            logger.error("Err: %s" % str(info))
                            logger.error("file_info_dict: %s" % str(file_info_dict))
                            from Ganga.Core.exceptions import GangaException
                            raise GangaException("Error getting OutputDataInfo")

                        valid_wildcards = [wc for wc in wildcards if fnmatch.fnmatch(file_name, wc)]
                        if not valid_wildcards:
                            valid_wildcards.append('')

                        for wc in valid_wildcards:
                            #logger.debug("wildcard: %s" % str(wc))

                            DiracFileData = 'DiracFile:::%s&&%s->%s:::%s:::%s\n' % (wc,
                                                                                    file_name,
                                                                                    info.get('LFN', 'Error Getting LFN!'),
                                                                                    str(info.get('LOCATIONS', ['NotAvailable'])),
                                                                                    info.get('GUID', 'NotAvailable')
                                                                                    )
                            #logger.debug("DiracFileData: %s" % str(DiracFileData))
                            postprocesslocationsfile.write(DiracFileData)
                            postprocesslocationsfile.flush()

                logger.debug("Written: %s" % open(lfn_store, 'r').readlines())

            # check outputsandbox downloaded correctly
            if not result_ok(getSandboxResult):
                logger.warning('Problem retrieving outputsandbox: %s' % str(getSandboxResult))
                DiracBase._getStateTime(job, 'failed')
                if job.status in ['removed', 'killed']:
                    return
                elif (job.master and job.master.status in ['removed', 'killed']):
                    return  # user changed it under us
                job.updateStatus('failed')
                if job.master:
                    job.master.updateMasterJobStatus()
                raise BackendError('Problem retrieving outputsandbox: %s' % str(getSandboxResult))

            # finally update job to completed
            DiracBase._getStateTime(job, 'completed', completeTimeResult)
            if job.status in ['removed', 'killed']:
                return
            elif (job.master and job.master.status in ['removed', 'killed']):
                return  # user changed it under us
            job.updateStatus('completed')
            if job.master:
                job.master.updateMasterJobStatus()
            now = time.time()
            logger.debug('Job ' + job.fqid + ' Time for complete update : ' + str(now - start))

        elif updated_dirac_status == 'failed':
            # firstly update status to failed
            DiracBase._getStateTime(job, 'failed')
            if job.status in ['removed', 'killed']:
                return
            if (job.master and job.master.status in ['removed', 'killed']):
                return  # user changed it under us
            job.updateStatus('failed')
            if job.master:
                job.master.updateMasterJobStatus()

            # if requested try downloading outputsandbox anyway
            if configDirac['failed_sandbox_download']:
                execute("getOutputSandbox(%d,'%s')" % (job.backend.id, job.getOutputWorkspace().getPath()))
        else:
            logger.error("Unexpected dirac status '%s' encountered" % updated_dirac_status)
Ejemplo n.º 25
0
    def _internal_job_finalisation(job, updated_dirac_status):
        """
        This method performs the main job finalisation
        Args:
            job (Job): Thi is the job we want to finalise
            updated_dirac_status (str): String representing the Ganga finalisation state of the job failed/completed
        """

        if updated_dirac_status == 'completed':
            start = time.time()
            # firstly update job to completing
            DiracBase._getStateTime(job, 'completing')
            if job.status in ['removed', 'killed']:
                return
            elif (job.master and job.master.status in ['removed', 'killed']):
                return  # user changed it under us

            job.updateStatus('completing')
            if job.master:
                job.master.updateMasterJobStatus()

            output_path = job.getOutputWorkspace().getPath()

            logger.info('Contacting DIRAC for job: %s' % job.fqid)
            # Contact dirac which knows about the job
            job.backend.normCPUTime, getSandboxResult, file_info_dict, completeTimeResult = execute(
                "finished_job(%d, '%s')" % (job.backend.id, output_path),
                cred_req=job.backend.credential_requirements)

            now = time.time()
            logger.info(
                '%0.2fs taken to download output from DIRAC for Job %s' %
                ((now - start), job.fqid))

            #logger.info('Job ' + job.fqid + ' OutputDataInfo: ' + str(file_info_dict))
            #logger.info('Job ' + job.fqid + ' OutputSandbox: ' + str(getSandboxResult))
            #logger.info('Job ' + job.fqid + ' normCPUTime: ' + str(job.backend.normCPUTime))

            # Set DiracFile metadata
            wildcards = [
                f.namePattern for f in job.outputfiles.get(DiracFile)
                if regex.search(f.namePattern) is not None
            ]

            lfn_store = os.path.join(
                output_path,
                getConfig('Output')['PostProcessLocationsFileName'])

            # Make the file on disk with a nullop...
            if not os.path.isfile(lfn_store):
                with open(lfn_store, 'w'):
                    pass

            if job.outputfiles.get(DiracFile):

                # Now we can iterate over the contents of the file without touching it
                with open(lfn_store, 'ab') as postprocesslocationsfile:
                    if not hasattr(file_info_dict, 'keys'):
                        logger.error("Error understanding OutputDataInfo: %s" %
                                     str(file_info_dict))
                        raise GangaDiracError(
                            "Error understanding OutputDataInfo: %s" %
                            str(file_info_dict))

                    ## Caution is not clear atm whether this 'Value' is an LHCbism or bug
                    list_of_files = file_info_dict.get('Value',
                                                       file_info_dict.keys())

                    for file_name in list_of_files:
                        file_name = os.path.basename(file_name)
                        info = file_info_dict.get(file_name)
                        #logger.debug("file_name: %s,\tinfo: %s" % (str(file_name), str(info)))

                        if not hasattr(info, 'get'):
                            logger.error(
                                "Error getting OutputDataInfo for: %s" %
                                str(job.getFQID('.')))
                            logger.error(
                                "Please check the Dirac Job still exists or attempt a job.backend.reset() to try again!"
                            )
                            logger.error("Err: %s" % str(info))
                            logger.error("file_info_dict: %s" %
                                         str(file_info_dict))
                            raise GangaDiracError(
                                "Error getting OutputDataInfo")

                        valid_wildcards = [
                            wc for wc in wildcards
                            if fnmatch.fnmatch(file_name, wc)
                        ]
                        if not valid_wildcards:
                            valid_wildcards.append('')

                        for wc in valid_wildcards:
                            #logger.debug("wildcard: %s" % str(wc))

                            DiracFileData = 'DiracFile:::%s&&%s->%s:::%s:::%s\n' % (
                                wc, file_name,
                                info.get('LFN', 'Error Getting LFN!'),
                                str(info.get('LOCATIONS', ['NotAvailable'])),
                                info.get('GUID', 'NotAvailable'))
                            #logger.debug("DiracFileData: %s" % str(DiracFileData))
                            postprocesslocationsfile.write(DiracFileData)
                            postprocesslocationsfile.flush()

                logger.debug("Written: %s" % open(lfn_store, 'r').readlines())

            # check outputsandbox downloaded correctly
            if not result_ok(getSandboxResult):
                logger.warning('Problem retrieving outputsandbox: %s' %
                               str(getSandboxResult))
                DiracBase._getStateTime(job, 'failed')
                if job.status in ['removed', 'killed']:
                    return
                elif (job.master
                      and job.master.status in ['removed', 'killed']):
                    return  # user changed it under us
                job.updateStatus('failed')
                if job.master:
                    job.master.updateMasterJobStatus()
                raise BackendError(
                    'Dirac', 'Problem retrieving outputsandbox: %s' %
                    str(getSandboxResult))

            # finally update job to completed
            DiracBase._getStateTime(job, 'completed', completeTimeResult)
            if job.status in ['removed', 'killed']:
                return
            elif (job.master and job.master.status in ['removed', 'killed']):
                return  # user changed it under us
            job.updateStatus('completed')
            if job.master:
                job.master.updateMasterJobStatus()
            now = time.time()
            logger.debug('Job ' + job.fqid + ' Time for complete update : ' +
                         str(now - start))

        elif updated_dirac_status == 'failed':
            # firstly update status to failed
            DiracBase._getStateTime(job, 'failed')
            if job.status in ['removed', 'killed']:
                return
            if (job.master and job.master.status in ['removed', 'killed']):
                return  # user changed it under us
            job.updateStatus('failed')
            if job.master:
                job.master.updateMasterJobStatus()

            # if requested try downloading outputsandbox anyway
            if configDirac['failed_sandbox_download']:
                execute("getOutputSandbox(%d,'%s')" %
                        (job.backend.id, job.getOutputWorkspace().getPath()),
                        cred_req=job.backend.credential_requirements)
        else:
            logger.error("Job #%s Unexpected dirac status '%s' encountered" %
                         (job.getFQID('.'), updated_dirac_status))