Example #1
0
    def createFileFromDataStructsFile(self, file, jobID):
        """
        _createFileFromDataStructsFile_

        This function will create a WMBS File given a DataStructs file
        """
        wmbsFile = File()
        wmbsFile.update(file)

        if type(file["locations"]) == set:
            seName = list(file["locations"])[0]
        elif type(file["locations"]) == list:
            if len(file['locations']) > 1:
                logging.error("Have more then one location for a file in job %i" % (jobID))
                logging.error("Choosing location %s" % (file['locations'][0]))
            seName = file["locations"][0]
        else:
            seName = file["locations"]

        wmbsFile["locations"] = set()

        if seName != None:
            wmbsFile.setLocation(se = seName, immediateSave = False)
        wmbsFile['jid'] = jobID
        self.wmbsFilesToBuild.append(wmbsFile)

        return wmbsFile
    def createFileFromDataStructsFile(self, file, jobID):
        """
        _createFileFromDataStructsFile_

        This function will create a WMBS File given a DataStructs file
        """
        wmbsFile = File()
        wmbsFile.update(file)

        if isinstance(file["locations"], set):
            pnn = list(file["locations"])[0]
        elif isinstance(file["locations"], list):
            if len(file['locations']) > 1:
                logging.error("Have more then one location for a file in job %i" % (jobID))
                logging.error("Choosing location %s" % (file['locations'][0]))
            pnn = file["locations"][0]
        else:
            pnn = file["locations"]

        wmbsFile["locations"] = set()

        if pnn != None:
            wmbsFile.setLocation(pnn = pnn, immediateSave = False)
        wmbsFile['jid'] = jobID
        
        return wmbsFile
Example #3
0
    def loadFiles(self, size=10):
        """
        _loadFiles_

        Grab some files from the resultProxy
        Should handle multiple proxies.  Not really sure about that
        """

        if len(self.proxies) < 1:
            # Well, you don't have any proxies.
            # This is what happens when you ran out of files last time
            logging.info("No additional files found; Ending.")
            return set()

        resultProxy = self.proxies[0]
        rawResults = []
        if type(resultProxy.keys) == list:
            keys = resultProxy.keys
        else:
            keys = resultProxy.keys()
            if type(keys) == set:
                # If it's a set, handle it
                keys = list(keys)
        files = set()

        while len(rawResults) < size and len(self.proxies) > 0:
            length = size - len(rawResults)
            newResults = resultProxy.fetchmany(size=length)
            if len(newResults) < length:
                # Assume we're all out
                # Eliminate this proxy
                self.proxies.remove(resultProxy)
            rawResults.extend(newResults)

        if rawResults == []:
            # Nothing to do
            return set()

        fileList = self.formatDict(results=rawResults, keys=keys)
        fileIDs = list(set([x["fileid"] for x in fileList]))

        myThread = threading.currentThread()
        fileInfoAct = self.daoFactory(classname="Files.GetForJobSplittingByID")
        fileInfoDict = fileInfoAct.execute(file=fileIDs, conn=myThread.transaction.conn, transaction=True)

        getLocAction = self.daoFactory(classname="Files.GetLocationBulk")
        getLocDict = getLocAction.execute(files=fileIDs, conn=myThread.transaction.conn, transaction=True)

        for fID in fileIDs:
            fl = WMBSFile(id=fID)
            fl.update(fileInfoDict[fID])
            locations = getLocDict.get((fID), [])
            for loc in locations:
                fl.setLocation(loc, immediateSave=False)
            files.add(fl)

        return files
Example #4
0
    def filesOfStatus(self,
                      status,
                      limit=0,
                      loadChecksums=True,
                      doingJobSplitting=False):
        """
        _filesOfStatus_

        Return a Set of File objects that have the given status with respect
        to this subscription.
        """
        existingTransaction = self.beginTransaction()

        status = status.title()
        files = set()
        if limit > 0:
            action = self.daofactory(
                classname="Subscriptions.Get%sFilesByLimit" % status)
            fileList = action.execute(self["id"],
                                      limit,
                                      conn=self.getDBConn(),
                                      transaction=self.existingTransaction())
        else:
            action = self.daofactory(classname="Subscriptions.Get%sFiles" %
                                     status)
            fileList = action.execute(self["id"],
                                      conn=self.getDBConn(),
                                      transaction=self.existingTransaction())

        if doingJobSplitting:
            fileInfoAct = self.daofactory(
                classname="Files.GetForJobSplittingByID")
        else:
            fileInfoAct = self.daofactory(classname="Files.GetByID")

        fileInfoDict = fileInfoAct.execute(
            file=[x["file"] for x in fileList],
            conn=self.getDBConn(),
            transaction=self.existingTransaction())

        #Run through all files
        for f in fileList:
            fl = File(id=f['file'])
            if loadChecksums:
                fl.loadChecksum()
            fl.update(fileInfoDict[f['file']])
            if 'locations' in f.keys():
                fl.setLocation(f['locations'], immediateSave=False)
            files.add(fl)

        self.commitTransaction(existingTransaction)
        return files
Example #5
0
    def filesOfStatus(self, status, limit = 0, loadChecksums = True, doingJobSplitting = False):
        """
        _filesOfStatus_
        
        Return a Set of File objects that have the given status with respect
        to this subscription.        
        """
        existingTransaction = self.beginTransaction()
        
        status = status.title()
        files  = set()
        if limit > 0:
            action = self.daofactory(classname = "Subscriptions.Get%sFilesByLimit" % status)
            fileList = action.execute(self["id"], limit, conn = self.getDBConn(),
                                      transaction = self.existingTransaction())
        else:
            action = self.daofactory(classname = "Subscriptions.Get%sFiles" % status)
            fileList = action.execute(self["id"], conn = self.getDBConn(),
                                      transaction = self.existingTransaction())

        if doingJobSplitting:
            fileInfoAct  = self.daofactory(classname = "Files.GetForJobSplittingByID")
        else:
            fileInfoAct  = self.daofactory(classname = "Files.GetByID")
            
        fileInfoDict = fileInfoAct.execute(file = [x["file"] for x in fileList],
                                           conn = self.getDBConn(),
                                           transaction = self.existingTransaction())
            
        #Run through all files
        for f in fileList:
            fl = File(id = f['file'])
            if loadChecksums:
                fl.loadChecksum()
            fl.update(fileInfoDict[f['file']])
            if 'locations' in f.keys():
                fl.setLocation(f['locations'], immediateSave = False)
            files.add(fl)
            
        self.commitTransaction(existingTransaction)
        return files
Example #6
0
    def loadFiles(self, size=10):
        """
        _loadFiles_

        Grab some files from the resultProxy
        Should handle multiple proxies.  Not really sure about that
        """

        if len(self.proxies) < 1:
            # Well, you don't have any proxies.
            # This is what happens when you ran out of files last time
            logging.info("No additional files found; Ending.")
            return set()

        resultProxy = self.proxies[0]
        rawResults = []
        if type(resultProxy.keys) == list:
            keys = resultProxy.keys
        else:
            keys = resultProxy.keys()
            if type(keys) == set:
                # If it's a set, handle it
                keys = list(keys)
        files = set()

        while len(rawResults) < size and len(self.proxies) > 0:
            length = size - len(rawResults)
            newResults = resultProxy.fetchmany(size=length)
            if len(newResults) < length:
                # Assume we're all out
                # Eliminate this proxy
                self.proxies.remove(resultProxy)
            rawResults.extend(newResults)

        if rawResults == []:
            # Nothing to do
            return set()

        fileList = self.formatDict(results=rawResults, keys=keys)
        fileIDs = list(set([x['fileid'] for x in fileList]))

        myThread = threading.currentThread()
        fileInfoAct = self.daoFactory(classname="Files.GetForJobSplittingByID")
        fileInfoDict = fileInfoAct.execute(file=fileIDs,
                                           conn=myThread.transaction.conn,
                                           transaction=True)

        getLocAction = self.daoFactory(classname="Files.GetLocationBulk")
        getLocDict = getLocAction.execute(files=fileIDs,
                                          conn=myThread.transaction.conn,
                                          transaction=True)

        for fID in fileIDs:
            fl = WMBSFile(id=fID)
            fl.update(fileInfoDict[fID])
            locations = getLocDict.get((fID), [])
            for loc in locations:
                fl.setLocation(loc, immediateSave=False)
            files.add(fl)

        return files
    def execute(self, jobID, fileSelection = None,
                conn = None, transaction = False):
        """
        _execute_

        Execute the SQL for the given job ID and then format and return
        the result.
        """

        if type(jobID) == list:
            if len(jobID) < 1:
                # Nothing to do
                return []
            binds = jobID
        else:
            binds = {"jobid": jobID}

        result = self.dbi.processData(self.sql, binds, conn = conn,
                                      transaction = transaction)

        jobList = self.formatJobs(result)

        filesResult = self.dbi.processData(self.fileSQL, binds, conn = conn,
                                           transaction = transaction)
        fileList  = self.formatDict(filesResult)
        fileBinds = []
        if fileSelection:
            fileList = filter(lambda x : x['lfn'] in fileSelection[x['jobid']], fileList)
        for x in fileList:
            # Add new runs
            x['newRuns'] = []
            # Assemble unique list of binds
            if not {'fileid': x['id']} in fileBinds:
                fileBinds.append({'fileid': x['id']})

        parentList = []
        if len(fileBinds) > 0:
            parentResult = self.dbi.processData(self.parentSQL, fileBinds, conn = conn,
                                                transaction = transaction)
            parentList   = self.formatDict(parentResult)

            lumiResult = self.dbi.processData(self.runLumiSQL, fileBinds, conn = conn,
                                              transaction = transaction)
            lumiList = self.formatDict(lumiResult)
            lumiDict = {}
            for l in lumiList:
                if not l['fileid'] in lumiDict.keys():
                    lumiDict[l['fileid']] = []
                lumiDict[l['fileid']].append(l)

            for f in fileList:
                fileRuns = {}
                if f['id'] in lumiDict.keys():
                    for l in lumiDict[f['id']]:
                        run  = l['run']
                        lumi = l['lumi']
                        try:
                            fileRuns[run].append(lumi)
                        except KeyError:
                            fileRuns[run] = []
                            fileRuns[run].append(lumi)

                for r in fileRuns.keys():
                    newRun = Run(runNumber = r)
                    newRun.lumis = fileRuns[r]
                    f['newRuns'].append(newRun)

        filesForJobs = {}
        for f in fileList:
            jobid = f['jobid']
            if not jobid in filesForJobs.keys():
                filesForJobs[jobid] = {}
            if f['id'] not in filesForJobs[jobid].keys():
                wmbsFile = File(id = f['id'])
                wmbsFile.update(f)
                wmbsFile['locations'].add(f['se_name'])
                for r in wmbsFile['newRuns']:
                    wmbsFile.addRun(r)
                for entry in parentList:
                    if entry['id'] == f['id']:
                        wmbsFile['parents'].add(entry['lfn'])
                filesForJobs[jobid][f['id']] = wmbsFile
            else:
                # If the file is there, just add the location
                filesForJobs[jobid][f['id']]['locations'].add(f['se_name'])

        for j in jobList:
            if j['id'] in filesForJobs.keys():
                j['input_files'] = filesForJobs[j['id']].values()

        return jobList
Example #8
0
    def execute(self, jobID, conn=None, transaction=False):
        """
        _execute_

        Execute the SQL for the given job ID and then format and return
        the result.
        """

        if type(jobID) == list:
            if len(jobID) < 1:
                # Nothing to do
                return []
            binds = jobID
        else:
            binds = {"jobid": jobID}

        result = self.dbi.processData(self.sql,
                                      binds,
                                      conn=conn,
                                      transaction=transaction)

        jobList = self.formatJobs(result)

        filesResult = self.dbi.processData(self.fileSQL,
                                           binds,
                                           conn=conn,
                                           transaction=transaction)
        fileList = self.formatDict(filesResult)
        fileBinds = []
        for x in fileList:
            # Add new runs
            x['newRuns'] = []
            # Assemble unique list of binds
            if not {'fileid': x['id']} in fileBinds:
                fileBinds.append({'fileid': x['id']})

        parentList = []
        if len(fileBinds) > 0:
            parentResult = self.dbi.processData(self.parentSQL,
                                                fileBinds,
                                                conn=conn,
                                                transaction=transaction)
            parentList = self.formatDict(parentResult)

            lumiResult = self.dbi.processData(self.runLumiSQL,
                                              fileBinds,
                                              conn=conn,
                                              transaction=transaction)
            lumiList = self.formatDict(lumiResult)
            lumiDict = {}
            for l in lumiList:
                if not l['fileid'] in lumiDict.keys():
                    lumiDict[l['fileid']] = []
                lumiDict[l['fileid']].append(l)

            for f in fileList:
                fileRuns = {}
                if f['id'] in lumiDict.keys():
                    for l in lumiDict[f['id']]:
                        run = l['run']
                        lumi = l['lumi']
                        try:
                            fileRuns[run].append(lumi)
                        except KeyError:
                            fileRuns[run] = []
                            fileRuns[run].append(lumi)

                for r in fileRuns.keys():
                    newRun = Run(runNumber=r)
                    newRun.lumis = fileRuns[r]
                    f['newRuns'].append(newRun)

        filesForJobs = {}
        for f in fileList:
            jobid = f['jobid']
            if not jobid in filesForJobs.keys():
                filesForJobs[jobid] = {}
            if f['id'] not in filesForJobs[jobid].keys():
                wmbsFile = File(id=f['id'])
                wmbsFile.update(f)
                wmbsFile['locations'].add(f['se_name'])
                for r in wmbsFile['newRuns']:
                    wmbsFile.addRun(r)
                for entry in parentList:
                    if entry['id'] == f['id']:
                        wmbsFile['parents'].add(entry['lfn'])
                filesForJobs[jobid][f['id']] = wmbsFile
            else:
                # If the file is there, just add the location
                filesForJobs[jobid][f['id']]['locations'].add(f['se_name'])

        for j in jobList:
            if j['id'] in filesForJobs.keys():
                j['input_files'] = filesForJobs[j['id']].values()

        return jobList
Example #9
0
    def execute(self, jobID, fileSelection=None,
                conn=None, transaction=False):
        """
        _execute_

        Execute the SQL for the given job ID and then format and return
        the result.
        fileSelection is a dictionary key'ed by the job id and with a list
        of lfns
        """

        if isinstance(jobID, list) and not len(jobID):
            return []
        elif isinstance(jobID, list):
            binds = jobID
        else:
            binds = [{"jobid": jobID}]

        result = self.dbi.processData(self.sql, binds, conn=conn,
                                      transaction=transaction)
        jobList = self.formatDict(result)
        for entry in jobList:
            entry.setdefault('input_files', [])

        filesResult = self.dbi.processData(self.fileSQL, binds, conn=conn,
                                           transaction=transaction)
        fileList = self.formatDict(filesResult)

        noDuplicateFiles = {}
        fileBinds = []
        if fileSelection:
            fileList = [x for x in fileList if x['lfn'] in fileSelection[x['jobid']]]
        for x in fileList:
            # Assemble unique list of binds
            if {'fileid': x['id']} not in fileBinds:
                fileBinds.append({'fileid': x['id']})
                noDuplicateFiles[x['id']] = x

        parentList = []
        if len(fileBinds) > 0:
            parentResult = self.dbi.processData(self.parentSQL, fileBinds, conn=conn,
                                                transaction=transaction)
            parentList = self.formatDict(parentResult)

            # only upload to not duplicate files to prevent excessive memory
            self.getRunLumis(fileBinds, noDuplicateFiles.values(), conn, transaction)

        filesForJobs = {}
        for f in fileList:
            jobid = f['jobid']
            filesForJobs.setdefault(jobid, {})

            if f['id'] not in filesForJobs[jobid]:
                wmbsFile = File(id=f['id'])
                # need to update with noDuplicateFiles since this one has run lumi information.

                wmbsFile.update(noDuplicateFiles[f["id"]])
                if f['pnn']:  # file might not have a valid location, or be Null
                    wmbsFile['locations'].add(f['pnn'])
                for r in wmbsFile.pop('newRuns'):
                    wmbsFile.addRun(r)
                for entry in parentList:
                    if entry['id'] == f['id']:
                        wmbsFile['parents'].add(entry['lfn'])
                wmbsFile.pop('pnn', None)  # not needed for anything, just remove it
                filesForJobs[jobid][f['id']] = wmbsFile
            elif f['pnn']:
                # If the file is there and it has a location, just add it
                filesForJobs[jobid][f['id']]['locations'].add(f['pnn'])

        for j in jobList:
            if j['id'] in filesForJobs.keys():
                j['input_files'] = filesForJobs[j['id']].values()

        return jobList
Example #10
0
    def execute(self, jobID, fileSelection=None, conn=None, transaction=False):
        """
        _execute_

        Execute the SQL for the given job ID and then format and return
        the result.
        fileSelection is a dictionary key'ed by the job id and with a list
        of lfns
        """

        if isinstance(jobID, list) and not len(jobID):
            return []
        elif isinstance(jobID, list):
            binds = jobID
        else:
            binds = [{"jobid": jobID}]

        result = self.dbi.processData(self.sql,
                                      binds,
                                      conn=conn,
                                      transaction=transaction)
        jobList = self.formatDict(result)
        for entry in jobList:
            entry.setdefault('input_files', [])

        filesResult = self.dbi.processData(self.fileSQL,
                                           binds,
                                           conn=conn,
                                           transaction=transaction)
        fileList = self.formatDict(filesResult)

        noDuplicateFiles = {}
        fileBinds = []
        if fileSelection:
            fileList = [
                x for x in fileList if x['lfn'] in fileSelection[x['jobid']]
            ]
        for x in fileList:
            # Assemble unique list of binds
            if {'fileid': x['id']} not in fileBinds:
                fileBinds.append({'fileid': x['id']})
                noDuplicateFiles[x['id']] = x

        parentList = []
        if len(fileBinds) > 0:
            parentResult = self.dbi.processData(self.parentSQL,
                                                fileBinds,
                                                conn=conn,
                                                transaction=transaction)
            parentList = self.formatDict(parentResult)

            # only upload to not duplicate files to prevent excessive memory
            self.getRunLumis(fileBinds, noDuplicateFiles.values(), conn,
                             transaction)

        filesForJobs = {}
        for f in fileList:
            jobid = f['jobid']
            filesForJobs.setdefault(jobid, {})

            if f['id'] not in filesForJobs[jobid]:
                wmbsFile = File(id=f['id'])
                # need to update with noDuplicateFiles since this one has run lumi information.

                wmbsFile.update(noDuplicateFiles[f["id"]])
                if 'pnn' in f:  # file might not have a valid location
                    wmbsFile['locations'].add(f['pnn'])
                for r in wmbsFile.pop('newRuns'):
                    wmbsFile.addRun(r)
                for entry in parentList:
                    if entry['id'] == f['id']:
                        wmbsFile['parents'].add(entry['lfn'])
                wmbsFile.pop('pnn', None)  # not needed for anything
                filesForJobs[jobid][f['id']] = wmbsFile
            elif 'pnn' in f:
                # If the file is there and it has a location, just add it
                filesForJobs[jobid][f['id']]['locations'].add(f['pnn'])

        for j in jobList:
            if j['id'] in filesForJobs.keys():
                j['input_files'] = filesForJobs[j['id']].values()

        return jobList
    def execute(self, jobID, conn = None, transaction = False):
        """
        _execute_

        Execute the SQL for the given job ID(s) and then format and return
        the result.
        """

        if type(jobID) != list:
            jobID = [jobID]

        binds = [{"jobid": x} for x in jobID]

        if not binds:
            return []

        #First load full file information with run/lumis
        filesResult = self.dbi.processData(self.fileSQL, binds, conn = conn,
                                           transaction = transaction)
        fileList = self.formatDict(filesResult)

        #Clear duplicates
        bindDict = {}
        for result in fileList:
            bindDict[result['id']] = 1
            result['newRuns'] = []
        fileBinds = [{'fileid' : x} for x in bindDict.keys()]

        #Load file information
        if len(fileBinds):
            lumiResult = self.dbi.processData(self.runLumiSQL, fileBinds, conn = conn,
                                              transaction = transaction)
            lumiList = self.formatDict(lumiResult)
            lumiDict = {}
            for l in lumiList:
                if not l['fileid'] in lumiDict.keys():
                    lumiDict[l['fileid']] = []
                lumiDict[l['fileid']].append(l)

            for f in fileList:
                fileRuns = {}
                if f['id'] in lumiDict.keys():
                    for l in lumiDict[f['id']]:
                        run = l['run']
                        lumi = l['lumi']
                        try:
                            fileRuns[run].append(lumi)
                        except KeyError:
                            fileRuns[run] = []
                            fileRuns[run].append(lumi)

                for r in fileRuns.keys():
                    newRun = Run(runNumber = r)
                    newRun.lumis = fileRuns[r]
                    f['newRuns'].append(newRun)

        filesForJobs = {}
        for f in fileList:
            jobid = f['jobid']
            if not jobid in filesForJobs.keys():
                filesForJobs[jobid] = {}
            if f['id'] not in filesForJobs[jobid].keys():
                wmbsFile = File(id = f['id'])
                wmbsFile.update(f)
                for r in wmbsFile['newRuns']:
                    wmbsFile.addRun(r)
                filesForJobs[jobid][f['id']] = wmbsFile


        #Add the file information to job objects and load the masks
        jobList = [Job(id = x) for x in jobID]
        for j in jobList:
            if j['id'] in filesForJobs.keys():
                j['input_files'] = filesForJobs[j['id']].values()
            j['mask'].load(j['id'])

        return jobList
Example #12
0
    def execute(self, jobID, conn=None, transaction=False):
        """
        _execute_

        Execute the SQL for the given job ID(s) and then format and return
        the result.
        """

        if type(jobID) != list:
            jobID = [jobID]

        binds = [{"jobid": x} for x in jobID]

        if not binds:
            return []

        #First load full file information with run/lumis
        filesResult = self.dbi.processData(self.fileSQL,
                                           binds,
                                           conn=conn,
                                           transaction=transaction)
        fileList = self.formatDict(filesResult)

        #Clear duplicates
        bindDict = {}
        for result in fileList:
            bindDict[result['id']] = 1
            result['newRuns'] = []
        fileBinds = [{'fileid': x} for x in bindDict.keys()]

        #Load file information
        if len(fileBinds):
            lumiResult = self.dbi.processData(self.runLumiSQL,
                                              fileBinds,
                                              conn=conn,
                                              transaction=transaction)
            lumiList = self.formatDict(lumiResult)
            lumiDict = {}
            for l in lumiList:
                if not l['fileid'] in lumiDict.keys():
                    lumiDict[l['fileid']] = []
                lumiDict[l['fileid']].append(l)

            for f in fileList:
                fileRuns = {}
                if f['id'] in lumiDict.keys():
                    for l in lumiDict[f['id']]:
                        run = l['run']
                        lumi = l['lumi']
                        try:
                            fileRuns[run].append(lumi)
                        except KeyError:
                            fileRuns[run] = []
                            fileRuns[run].append(lumi)

                for r in fileRuns.keys():
                    newRun = Run(runNumber=r)
                    newRun.lumis = fileRuns[r]
                    f['newRuns'].append(newRun)

        filesForJobs = {}
        for f in fileList:
            jobid = f['jobid']
            if not jobid in filesForJobs.keys():
                filesForJobs[jobid] = {}
            if f['id'] not in filesForJobs[jobid].keys():
                wmbsFile = File(id=f['id'])
                wmbsFile.update(f)
                for r in wmbsFile['newRuns']:
                    wmbsFile.addRun(r)
                filesForJobs[jobid][f['id']] = wmbsFile

        #Add the file information to job objects and load the masks
        jobList = [Job(id=x) for x in jobID]
        for j in jobList:
            if j['id'] in filesForJobs.keys():
                j['input_files'] = filesForJobs[j['id']].values()
            j['mask'].load(j['id'])

        return jobList