Example #1
0
    def execute(self, jobID, fileSelection=None,
                conn=None, transaction=False):
        """
        _execute_

        Execute the SQL for the given job ID and then format and return
        the result.
        fileSelection is a dictionary key'ed by the job id and with a list
        of lfns
        """

        if isinstance(jobID, list) and not len(jobID):
            return []
        elif isinstance(jobID, list):
            binds = jobID
        else:
            binds = [{"jobid": jobID}]

        result = self.dbi.processData(self.sql, binds, conn=conn,
                                      transaction=transaction)
        jobList = self.formatDict(result)
        for entry in jobList:
            entry.setdefault('input_files', [])

        filesResult = self.dbi.processData(self.fileSQL, binds, conn=conn,
                                           transaction=transaction)
        fileList = self.formatDict(filesResult)

        noDuplicateFiles = {}
        fileBinds = []
        if fileSelection:
            fileList = [x for x in fileList if x['lfn'] in fileSelection[x['jobid']]]
        for x in fileList:
            # Assemble unique list of binds
            if {'fileid': x['id']} not in fileBinds:
                fileBinds.append({'fileid': x['id']})
                noDuplicateFiles[x['id']] = x

        parentList = []
        if len(fileBinds) > 0:
            parentResult = self.dbi.processData(self.parentSQL, fileBinds, conn=conn,
                                                transaction=transaction)
            parentList = self.formatDict(parentResult)

            # only upload to not duplicate files to prevent excessive memory
            self.getRunLumis(fileBinds, noDuplicateFiles.values(), conn, transaction)

        filesForJobs = {}
        for f in fileList:
            jobid = f['jobid']
            filesForJobs.setdefault(jobid, {})

            if f['id'] not in filesForJobs[jobid]:
                wmbsFile = File(id=f['id'])
                # need to update with noDuplicateFiles since this one has run lumi information.

                wmbsFile.update(noDuplicateFiles[f["id"]])
                if f['pnn']:  # file might not have a valid location, or be Null
                    wmbsFile['locations'].add(f['pnn'])
                for r in wmbsFile.pop('newRuns'):
                    wmbsFile.addRun(r)
                for entry in parentList:
                    if entry['id'] == f['id']:
                        wmbsFile['parents'].add(entry['lfn'])
                wmbsFile.pop('pnn', None)  # not needed for anything, just remove it
                filesForJobs[jobid][f['id']] = wmbsFile
            elif f['pnn']:
                # If the file is there and it has a location, just add it
                filesForJobs[jobid][f['id']]['locations'].add(f['pnn'])

        for j in jobList:
            if j['id'] in filesForJobs.keys():
                j['input_files'] = filesForJobs[j['id']].values()

        return jobList
Example #2
0
    def execute(self, jobID, fileSelection=None, conn=None, transaction=False):
        """
        _execute_

        Execute the SQL for the given job ID and then format and return
        the result.
        fileSelection is a dictionary key'ed by the job id and with a list
        of lfns
        """

        if isinstance(jobID, list) and not len(jobID):
            return []
        elif isinstance(jobID, list):
            binds = jobID
        else:
            binds = [{"jobid": jobID}]

        result = self.dbi.processData(self.sql,
                                      binds,
                                      conn=conn,
                                      transaction=transaction)
        jobList = self.formatDict(result)
        for entry in jobList:
            entry.setdefault('input_files', [])

        filesResult = self.dbi.processData(self.fileSQL,
                                           binds,
                                           conn=conn,
                                           transaction=transaction)
        fileList = self.formatDict(filesResult)

        noDuplicateFiles = {}
        fileBinds = []
        if fileSelection:
            fileList = [
                x for x in fileList if x['lfn'] in fileSelection[x['jobid']]
            ]
        for x in fileList:
            # Assemble unique list of binds
            if {'fileid': x['id']} not in fileBinds:
                fileBinds.append({'fileid': x['id']})
                noDuplicateFiles[x['id']] = x

        parentList = []
        if len(fileBinds) > 0:
            parentResult = self.dbi.processData(self.parentSQL,
                                                fileBinds,
                                                conn=conn,
                                                transaction=transaction)
            parentList = self.formatDict(parentResult)

            # only upload to not duplicate files to prevent excessive memory
            self.getRunLumis(fileBinds, noDuplicateFiles.values(), conn,
                             transaction)

        filesForJobs = {}
        for f in fileList:
            jobid = f['jobid']
            filesForJobs.setdefault(jobid, {})

            if f['id'] not in filesForJobs[jobid]:
                wmbsFile = File(id=f['id'])
                # need to update with noDuplicateFiles since this one has run lumi information.

                wmbsFile.update(noDuplicateFiles[f["id"]])
                if 'pnn' in f:  # file might not have a valid location
                    wmbsFile['locations'].add(f['pnn'])
                for r in wmbsFile.pop('newRuns'):
                    wmbsFile.addRun(r)
                for entry in parentList:
                    if entry['id'] == f['id']:
                        wmbsFile['parents'].add(entry['lfn'])
                wmbsFile.pop('pnn', None)  # not needed for anything
                filesForJobs[jobid][f['id']] = wmbsFile
            elif 'pnn' in f:
                # If the file is there and it has a location, just add it
                filesForJobs[jobid][f['id']]['locations'].add(f['pnn'])

        for j in jobList:
            if j['id'] in filesForJobs.keys():
                j['input_files'] = filesForJobs[j['id']].values()

        return jobList