예제 #1
0
    def loadFilesByBlock(self, blockname):
        """
        _loadFilesByBlock_

        Get all files associated with a block
        """
        dbsFiles = []

        findFiles = self.daoFactory(classname = "LoadFilesByBlock")
        results = findFiles.execute(blockname = blockname, transaction = False)

        for entry in results:
            # Add loaded information
            dbsfile = DBSBufferFile(id=entry['id'])
            dbsfile.update(entry)
            dbsFiles.append(dbsfile)

        for dbsfile in dbsFiles:
            if 'runInfo' in dbsfile.keys():
                # Then we have to replace it with a real run
                for r in dbsfile['runInfo'].keys():
                    run = Run(runNumber = r)
                    run.extend(dbsfile['runInfo'][r])
                    dbsfile.addRun(run)
                del dbsfile['runInfo']
            if 'parentLFNs' in dbsfile.keys():
                # Then we have some parents
                for lfn in dbsfile['parentLFNs']:
                    newFile = DBSBufferFile(lfn = lfn)
                    dbsfile['parents'].add(newFile)
                del dbsfile['parentLFNs']

        return dbsFiles
예제 #2
0
    def findUploadableFilesByDAS(self, datasetpath):
        """
        _findUploadableDAS_

        Find all the uploadable files for a given DatasetPath.
        """
        dbsFiles = []

        findFiles = self.daoFactory(classname = "LoadDBSFilesByDAS")
        results = findFiles.execute(datasetpath = datasetpath, transaction = False)

        for entry in results:
            # Add loaded information
            dbsfile = DBSBufferFile(id=entry['id'])
            dbsfile.update(entry)
            dbsFiles.append(dbsfile)

        for dbsfile in dbsFiles:
            if 'runInfo' in dbsfile.keys():
                # Then we have to replace it with a real run
                for r in dbsfile['runInfo'].keys():
                    run = Run(runNumber = r)
                    run.extend(dbsfile['runInfo'][r])
                    dbsfile.addRun(run)
                del dbsfile['runInfo']
            if 'parentLFNs' in dbsfile.keys():
                # Then we have some parents
                for lfn in dbsfile['parentLFNs']:
                    newFile = DBSBufferFile(lfn = lfn)
                    dbsfile['parents'].add(newFile)
                del dbsfile['parentLFNs']

        return dbsFiles
예제 #3
0
    def getChunkFiles(self, collectionName, filesetName, chunkOffset, chunkSize = 100,
                      user = "******", group = "cmsdataops"):
        """
        _getChunkFiles_

        Retrieve a chunk of files from the given collection and task.
        """
        chunkFiles = []
        result = self.couchdb.loadView("ACDC", "owner_coll_fileset_files",
                                       {"startkey": [group, user,
                                                     collectionName, filesetName],
                                        "endkey": [group, user,
                                                   collectionName, filesetName, {}],
                                        "limit": chunkSize,
                                        "skip": chunkOffset,
                                        }, [])

        for row in result["rows"]:
            resultRow = row['value']
            newFile = File(lfn = resultRow["lfn"], size = resultRow["size"],
                           events = resultRow["events"], parents = set(resultRow["parents"]),
                           locations = set(resultRow["locations"]), merged = resultRow["merged"])
            for run in resultRow["runs"]:
                newRun = Run(run["run_number"])
                newRun.extend(run["lumis"])
                newFile.addRun(newRun)

            chunkFiles.append(newFile)

        return chunkFiles
예제 #4
0
def injectFilesFromDBS(inputFileset, datasetPath, runsWhiteList=[]):
    """
    _injectFilesFromDBS_

    """
    print("injecting files from %s into %s, please wait..." % (datasetPath, inputFileset.name))
    args = {}
    args["url"] = "https://cmsweb.cern.ch/dbs/prod/global/DBSReader"
    args["version"] = "DBS_2_1_1"
    args["mode"] = "GET"
    dbsApi = DbsApi(args)
    dbsResults = dbsApi.listFileArray(path=datasetPath, retriveList=["retrive_lumi", "retrive_run"])
    print("  found %d files, inserting into wmbs..." % (len(dbsResults)))

    for dbsResult in dbsResults:
        if runsWhiteList and str(dbsResult["LumiList"][0]["RunNumber"]) not in runsWhiteList:
            continue
        myFile = File(lfn=dbsResult["LogicalFileName"], size=dbsResult["FileSize"],
                      events=dbsResult["NumberOfEvents"], checksums={"cksum": dbsResult["Checksum"]},
                      locations="cmssrm.fnal.gov", merged=True)
        myRun = Run(runNumber=dbsResult["LumiList"][0]["RunNumber"])
        for lumi in dbsResult["LumiList"]:
            myRun.appendLumi(lumi["LumiSectionNumber"])
        myFile.addRun(myRun)
        myFile.create()
        inputFileset.addFile(myFile)

    if len(inputFileset) < 1:
        raise Exception("No files were selected!")

    inputFileset.commit()
    inputFileset.markOpen(False)
    return
예제 #5
0
    def getRunLumis(self, fileBinds, fileList,
                    conn=None, transaction=False):
        """
        _getRunLumis_

        Fetch run/lumi/events information for each file and append Run objects
        to the files information.
        """
        lumiResult = self.dbi.processData(self.runLumiSQL, fileBinds, conn=conn,
                                          transaction=transaction)
        lumiList = self.formatDict(lumiResult)

        lumiDict = {}
        for l in lumiList:
            lumiDict.setdefault(l['fileid'], [])
            lumiDict[l['fileid']].append(l)

        for f in fileList:
            # Add new runs
            f.setdefault('newRuns', [])

            fileRuns = {}
            if f['id'] in lumiDict.keys():
                for l in lumiDict[f['id']]:
                    run = l['run']
                    lumi = l['lumi']
                    numEvents = l['num_events']
                    fileRuns.setdefault(run, [])
                    fileRuns[run].append((lumi, numEvents))

            for r in fileRuns.keys():
                newRun = Run(runNumber=r)
                newRun.lumis = fileRuns[r]
                f['newRuns'].append(newRun)
        return
예제 #6
0
    def prepareDBSFiles(self):
        """
        _prepareDBSFiles_

        Retrieve the information from the JSON input data
        and create DBSFile objects that can be registered in the
        database.
        """
        timestamp = time.strftime('%m%d%y_%H%M%S')
        for fileEntry in self.inputData:
            # Get all the info out of a standard named dataset
            datasetInfo = str(fileEntry["dataset"])
            tokens = datasetInfo.split('/')
            primDs = tokens[1]
            procDs = tokens[2]
            dataTier = tokens[3]
            procDsTokens = procDs.split('-')
            acqEra = procDsTokens[0]
            procVer = procDsTokens[-1][1:]

            ckSumInfo = fileEntry["checksums"]
            for entry in ckSumInfo:
                ckSumInfo[entry] = str(ckSumInfo[entry])

            # Build the basic dbsBuffer file
            dbsFile = DBSBufferFile(lfn = str(fileEntry["lfn"]),
                                    size = int(fileEntry.get("size", 0)),
                                    events = int(fileEntry.get("events", 0)),
                                    checksums = ckSumInfo,
                                    status = "NOTUPLOADED")
            dbsFile.setAlgorithm(appName = "cmsRun",
                                 appVer = str(fileEntry.get("cmssw", "LEGACY")),
                                 appFam = "Legacy",
                                 psetHash = "GIBBERISH",
                                 configContent = "None;;None;;None")

            dbsFile.setDatasetPath("/%s/%s/%s" % (primDs,
                                                  procDs,
                                                  dataTier))
            dbsFile.setValidStatus(validStatus = "PRODUCTION")
            dbsFile.setProcessingVer(ver = procVer)
            dbsFile.setAcquisitionEra(era = acqEra)
            dbsFile.setGlobalTag(globalTag = str(fileEntry.get('globalTag', "LEGACY")))

            # Build a representative task name
            dbsFile['task'] = '/LegacyInsertionTask_%s/Insertion' % timestamp

            # Get the runs and lumis
            runsAndLumis = fileEntry.get("runsAndLumis", {})
            for run in runsAndLumis:
                newRun = Run(runNumber = int(run))
                newRun.extend([int(x) for x in runsAndLumis[run]])
                dbsFile.addRun(newRun)

            # Complete the file information with the location and queue it
            dbsFile.setLocation(se = str(fileEntry["location"]), immediateSave = False)
            self.dbsFilesToCreate.append(dbsFile)
        self.inputData = None
        return
예제 #7
0
    def loadDBSBufferFilesBulk(self, fileObjs):
        """
        _loadDBSBufferFilesBulk_

        Yes, this is a stupid place to put it.
        No, there's not better place.
        """


        myThread = threading.currentThread()

        dbsFiles = []

        existingTransaction = self.beginTransaction()

        factory = DAOFactory(package = "WMComponent.DBSBuffer.Database",
                             logger = myThread.logger,
                             dbinterface = myThread.dbi)

        binds = []
        for f in fileObjs:
            binds.append(f["id"])


        loadFiles = factory(classname = "DBSBufferFiles.LoadBulkFilesByID")
        results = loadFiles.execute(files = binds, conn = self.getDBConn(),
                                    transaction = self.existingTransaction())


        for entry in results:
            # Add loaded information
            dbsfile = DBSBufferFile(id=entry['id'])
            dbsfile.update(entry)
            dbsFiles.append(dbsfile)

        for dbsfile in dbsFiles:
            if 'runInfo' in dbsfile.keys():
            # Then we have to replace it with a real run
                for r in dbsfile['runInfo'].keys():
                    run = Run(runNumber = r)
                    run.extend(dbsfile['runInfo'][r])
                    dbsfile.addRun(run)
                del dbsfile['runInfo']
            if 'parentLFNs' in dbsfile.keys():
                # Then we have some parents
                for lfn in dbsfile['parentLFNs']:
                    newFile = DBSBufferFile(lfn = lfn)
                    dbsfile['parents'].add(newFile)
                del dbsfile['parentLFNs']



        self.commitTransaction(existingTransaction)


        return dbsFiles
예제 #8
0
    def addFileToDBS(self, jobReportFile, task, errorDataset = False):
        """
        _addFileToDBS_

        Add a file that was output from a job to the DBS buffer.
        """
        datasetInfo = jobReportFile["dataset"]

        dbsFile = DBSBufferFile(lfn = jobReportFile["lfn"],
                                size = jobReportFile["size"],
                                events = jobReportFile["events"],
                                checksums = jobReportFile["checksums"],
                                status = "NOTUPLOADED")
        dbsFile.setAlgorithm(appName = datasetInfo["applicationName"],
                             appVer = datasetInfo["applicationVersion"],
                             appFam = jobReportFile["module_label"],
                             psetHash = "GIBBERISH",
                             configContent = jobReportFile.get('configURL'))

        if errorDataset:
            dbsFile.setDatasetPath("/%s/%s/%s" % (datasetInfo["primaryDataset"] + "-Error",
                                                  datasetInfo["processedDataset"],
                                                  datasetInfo["dataTier"]))
        else:
            dbsFile.setDatasetPath("/%s/%s/%s" % (datasetInfo["primaryDataset"],
                                                  datasetInfo["processedDataset"],
                                                  datasetInfo["dataTier"]))

        dbsFile.setValidStatus(validStatus = jobReportFile.get("validStatus", None))
        dbsFile.setProcessingVer(ver = jobReportFile.get('processingVer', None))
        dbsFile.setAcquisitionEra(era = jobReportFile.get('acquisitionEra', None))
        dbsFile.setGlobalTag(globalTag = jobReportFile.get('globalTag', None))
        #TODO need to find where to get the prep id
        dbsFile.setPrepID(prep_id = jobReportFile.get('prep_id', None))
        dbsFile['task'] = task

        for run in jobReportFile["runs"]:
            newRun = Run(runNumber = run.run)
            newRun.extend(run.lumis)
            dbsFile.addRun(newRun)


        dbsFile.setLocation(pnn = list(jobReportFile["locations"])[0], immediateSave = False)
        self.dbsFilesToCreate.append(dbsFile)
        return
예제 #9
0
    def findUploadableFilesByDAS(self, das):
        """
        _findUploadableDAS_

        Find all the Dataset-Algo files available
        with uploadable files.
        """

        myThread = threading.currentThread()
        existingTransaction = self.beginTransaction()

        dbsFiles = []

        factory = DAOFactory(package = "WMComponent.DBSUpload.Database",
                             logger = myThread.logger,
                             dbinterface = myThread.dbi)
        findFiles = factory(classname = "LoadDBSFilesByDAS")
        results   = findFiles.execute(das = das,
                                      conn = self.getDBConn(),
                                      transaction=self.existingTransaction())

        for entry in results:
            # Add loaded information
            dbsfile = DBSBufferFile(id=entry['id'])
            dbsfile.update(entry)
            dbsFiles.append(dbsfile)

        for dbsfile in dbsFiles:
            if 'runInfo' in dbsfile.keys():
                # Then we have to replace it with a real run
                for r in dbsfile['runInfo'].keys():
                    run = Run(runNumber = r)
                    run.extend(dbsfile['runInfo'][r])
                    dbsfile.addRun(run)
                del dbsfile['runInfo']
            if 'parentLFNs' in dbsfile.keys():
                # Then we have some parents
                for lfn in dbsfile['parentLFNs']:
                    newFile = DBSBufferFile(lfn = lfn)
                    dbsfile['parents'].add(newFile)
                del dbsfile['parentLFNs']

        self.commitTransaction(existingTransaction)

        return dbsFiles
예제 #10
0
    def loadDBSBufferFilesBulk(self, fileObjs):
        """
        _loadDBSBufferFilesBulk_

        Yes, this is a stupid place to put it.
        No, there's not better place.
        """

        myThread = threading.currentThread()

        dbsFiles = []

        existingTransaction = self.beginTransaction()

        binds = []
        for f in fileObjs:
            binds.append(f["id"])

        loadFiles = self.daoFactory(classname="DBSBufferFiles.LoadBulkFilesByID")
        results = loadFiles.execute(files=binds, conn=self.getDBConn(), transaction=self.existingTransaction())

        for entry in results:
            # Add loaded information
            dbsfile = DBSBufferFile(id=entry["id"])
            dbsfile.update(entry)
            dbsFiles.append(dbsfile)

        for dbsfile in dbsFiles:
            if "runInfo" in dbsfile.keys():
                # Then we have to replace it with a real run
                for r in dbsfile["runInfo"].keys():
                    run = Run(runNumber=r)
                    run.extend(dbsfile["runInfo"][r])
                    dbsfile.addRun(run)
                del dbsfile["runInfo"]
            if "parentLFNs" in dbsfile.keys():
                # Then we have some parents
                for lfn in dbsfile["parentLFNs"]:
                    newFile = DBSBufferFile(lfn=lfn)
                    dbsfile["parents"].add(newFile)
                del dbsfile["parentLFNs"]

        self.commitTransaction(existingTransaction)

        return dbsFiles
예제 #11
0
def runHandler():
    """
    _runHandler_

    Sink to add run information to a file.  Given the following XML:
      <Runs>
      <Run ID="122023">
        <LumiSection NEvents="100" ID="215"/>
        <LumiSection NEvents="100" ID="216"/>
      </Run>
      <Run ID="122024">
        <LumiSection ID="1"/>
        <LumiSection ID="2"/>
      </Run>
      </Runs>

    Create a WMCore.DataStructs.Run object for each run and call the
    addRunInfoToFile() function to add the run information to the file
    section.
    """
    while True:
        fileSection, node = (yield)
        for subnode in node.children:
            if subnode.name == "Run":
                runId = subnode.attrs.get("ID", None)
                if runId is None:
                    continue

                lumis = []
                for lumi in subnode.children:
                    if "ID" in lumi.attrs:
                        lumiNumber = int(lumi.attrs['ID'])
                        nEvents = lumi.attrs.get("NEvents", None)
                        if nEvents is not None:
                            try:
                                nEvents = int(nEvents)
                            except ValueError:
                                nEvents = None
                        lumis.append((lumiNumber, nEvents))
                runInfo = Run(runNumber=runId)
                runInfo.extendLumis(lumis)

                Report.addRunInfoToFile(fileSection, runInfo)
예제 #12
0
    def loadFilesFromBlocks(self, blockID):
        """
        _loadFilesFromBlocks_

        Load the files from all active blocks
        """
        findFiles = self.factory(classname = "LoadFilesFromBlocks")

        myThread = threading.currentThread()
        existingTransaction = self.beginTransaction()

        dbsFiles = []

        results   = findFiles.execute(blockID = blockID,
                                      conn = self.getDBConn(),
                                      transaction=self.existingTransaction())

        for entry in results:
            # Add loaded information
            dbsfile = DBSBufferFile(id=entry['id'])
            dbsfile.update(entry)
            dbsFiles.append(dbsfile)

        for dbsfile in dbsFiles:
            if 'runInfo' in dbsfile.keys():
                # Then we have to replace it with a real run
                for r in dbsfile['runInfo'].keys():
                    run = Run(runNumber = r)
                    run.extend(dbsfile['runInfo'][r])
                    dbsfile.addRun(run)
                del dbsfile['runInfo']
            if 'parentLFNs' in dbsfile.keys():
                # Then we have some parents
                for lfn in dbsfile['parentLFNs']:
                    newFile = DBSBufferFile(lfn = lfn)
                    dbsfile['parents'].add(newFile)
                del dbsfile['parentLFNs']

        self.commitTransaction(existingTransaction)

        return dbsFiles
예제 #13
0
    def findUploadableFilesByDAS(self, das):
        """
        _findUploadableDAS_

        Find all the Dataset-Algo files available
        with uploadable files.
        """

        myThread = threading.currentThread()
        existingTransaction = self.beginTransaction()

        dbsFiles = []

        findFiles = self.daoFactory(classname="LoadDBSFilesByDAS")
        results = findFiles.execute(das=das, conn=self.getDBConn(), transaction=self.existingTransaction())

        for entry in results:
            # Add loaded information
            dbsfile = DBSBufferFile(id=entry["id"])
            dbsfile.update(entry)
            dbsFiles.append(dbsfile)

        for dbsfile in dbsFiles:
            if "runInfo" in dbsfile.keys():
                # Then we have to replace it with a real run
                for r in dbsfile["runInfo"].keys():
                    run = Run(runNumber=r)
                    run.extend(dbsfile["runInfo"][r])
                    dbsfile.addRun(run)
                del dbsfile["runInfo"]
            if "parentLFNs" in dbsfile.keys():
                # Then we have some parents
                for lfn in dbsfile["parentLFNs"]:
                    newFile = DBSBufferFile(lfn=lfn)
                    dbsfile["parents"].add(newFile)
                del dbsfile["parentLFNs"]

        self.commitTransaction(existingTransaction)

        return dbsFiles
    def getChunkFiles(self, collectionName, filesetName, chunkOffset, chunkSize=100):
        """
        _getChunkFiles_

        Retrieve a chunk of files from the given collection and task.
        """
        chunkFiles = []
        files = self._getFilesetInfo(collectionName, filesetName, chunkOffset, chunkSize)

        files = mergeFakeFiles(files)
        for fileInfo in files:
            newFile = File(lfn=fileInfo["lfn"], size=fileInfo["size"],
                           events=fileInfo["events"], parents=set(fileInfo["parents"]),
                           locations=set(fileInfo["locations"]), merged=fileInfo["merged"])
            for run in fileInfo["runs"]:
                newRun = Run(run["run_number"])
                newRun.extend(run["lumis"])
                newFile.addRun(newRun)

            chunkFiles.append(newFile)

        return chunkFiles
예제 #15
0
    def getFiles(self, name, tier, nFiles = 12, site = "malpaquet"):
        """
        Create some quick dummy test files


        """

        files = []

        for f in range(0, nFiles):
            testFile = DBSBufferFile(lfn = '%s-%s-%i' % (name, site, f), size = 1024,
                                     events = 20, checksums = {'cksum': 1})
            testFile.setAlgorithm(appName = name, appVer = "CMSSW_3_1_1",
                                  appFam = "RECO", psetHash = "GIBBERISH",
                                  configContent = self.configURL)
            testFile.setDatasetPath("/%s/%s/%s" % (name, name, tier))
            testFile.addRun(Run( 1, *[f]))
            testFile.setGlobalTag("aGlobalTag")
            testFile.create()
            testFile.setLocation(site)
            files.append(testFile)


        testFileChild = DBSBufferFile(lfn = '%s-%s-child' %(name, site), size = 1024,
                                 events = 10, checksums = {'cksum': 1})
        testFileChild.setAlgorithm(appName = name, appVer = "CMSSW_3_1_1",
                              appFam = "RECO", psetHash = "GIBBERISH",
                              configContent = self.configURL)
        testFileChild.setDatasetPath("/%s/%s_2/RECO" %(name, name))
        testFileChild.addRun(Run( 1, *[45]))
        testFileChild.setGlobalTag("aGlobalTag")
        testFileChild.create()
        testFileChild.setLocation(site)

        testFileChild.addParents([x['lfn'] for x in files])


        return files
예제 #16
0
    def getRunLumis(self, fileBinds, fileList,
                    conn=None, transaction=False):
        """
        _getRunLumis_

        Fetch run/lumi/events information for each file and append Run objects
        to the files information.
        """
        if not fileBinds:
            return

        lumiResult = self.dbi.processData(self.runLumiSQL, fileBinds, conn=conn,
                                          transaction=transaction)
        lumiList = self.formatDict(lumiResult)

        lumiDict = {}
        for l in lumiList:
            lumiDict.setdefault(l['fileid'], [])
            lumiDict[l['fileid']].append(l)

        for f in fileList:
            # Add new runs
            f.setdefault('newRuns', [])

            fileRuns = {}
            if f['id'] in lumiDict:
                for l in lumiDict[f['id']]:
                    run = l['run']
                    lumi = l['lumi']
                    numEvents = l['num_events']
                    fileRuns.setdefault(run, [])
                    fileRuns[run].append((lumi, numEvents))

            for r in fileRuns:
                newRun = Run(runNumber=r)
                newRun.lumis = fileRuns[r]
                f['newRuns'].append(newRun)
        return
예제 #17
0
    def loadFilesByBlock(self, blockname):
        """
        _loadFilesByBlock_

        Get all files associated with a block
        """

        dbsFiles = []

        existingTransaction = self.beginTransaction()

        findFiles = self.daoFactory(classname="LoadFilesByBlock")
        results = findFiles.execute(blockname=blockname, conn=self.getDBConn(), transaction=self.existingTransaction())

        for entry in results:
            # Add loaded information
            dbsfile = DBSBufferFile(id=entry["id"])
            dbsfile.update(entry)
            dbsFiles.append(dbsfile)

        for dbsfile in dbsFiles:
            if "runInfo" in dbsfile.keys():
                # Then we have to replace it with a real run
                for r in dbsfile["runInfo"].keys():
                    run = Run(runNumber=r)
                    run.extend(dbsfile["runInfo"][r])
                    dbsfile.addRun(run)
                del dbsfile["runInfo"]
            if "parentLFNs" in dbsfile.keys():
                # Then we have some parents
                for lfn in dbsfile["parentLFNs"]:
                    newFile = DBSBufferFile(lfn=lfn)
                    dbsfile["parents"].add(newFile)
                del dbsfile["parentLFNs"]

        self.commitTransaction(existingTransaction)

        return dbsFiles
예제 #18
0
    def loadDBSBufferFilesBulk(self, fileObjs):
        """
        _loadDBSBufferFilesBulk_

        Yes, this is a stupid place to put it.
        No, there's not better place.
        """
        dbsFiles = []

        binds = []
        for f in fileObjs:
            binds.append(f["id"])

        loadFiles = self.daoFactory(classname = "DBSBufferFiles.LoadBulkFilesByID")
        results = loadFiles.execute(files = binds, transaction = False)

        for entry in results:
            # Add loaded information
            dbsfile = DBSBufferFile(id=entry['id'])
            dbsfile.update(entry)
            dbsFiles.append(dbsfile)

        for dbsfile in dbsFiles:
            if 'runInfo' in dbsfile.keys():
            # Then we have to replace it with a real run
                for r in dbsfile['runInfo'].keys():
                    run = Run(runNumber = r)
                    run.extend(dbsfile['runInfo'][r])
                    dbsfile.addRun(run)
                del dbsfile['runInfo']
            if 'parentLFNs' in dbsfile.keys():
                # Then we have some parents
                for lfn in dbsfile['parentLFNs']:
                    newFile = DBSBufferFile(lfn = lfn)
                    dbsfile['parents'].add(newFile)
                del dbsfile['parentLFNs']

        return dbsFiles
예제 #19
0
    def createTestJob(self, subscriptionType="Merge"):
        """
        _createTestJob_

        Create a test job with two files as input.  This will also create the
        appropriate workflow, jobgroup and subscription.
        """
        testWorkflow = Workflow(spec=makeUUID(),
                                owner="Simon",
                                name=makeUUID(),
                                task="Test")
        testWorkflow.create()

        testWMBSFileset = Fileset(name="TestFileset")
        testWMBSFileset.create()

        testSubscription = Subscription(fileset=testWMBSFileset,
                                        workflow=testWorkflow,
                                        type=subscriptionType)
        testSubscription.create()

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10)
        testFileA.addRun(Run(1, *[45]))
        testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10)
        testFileB.addRun(Run(1, *[46]))
        testFileA.create()
        testFileB.create()

        testJob = Job(name=makeUUID(), files=[testFileA, testFileB])
        testJob["couch_record"] = "somecouchrecord"
        testJob["location"] = "test.site.ch"
        testJob.create(group=testJobGroup)
        testJob.associateFiles()

        return testJob
예제 #20
0
    def loadDBSBufferFilesBulk(self, fileObjs):
        """
        _loadDBSBufferFilesBulk_

        Yes, this is a stupid place to put it.
        No, there's not better place.
        """
        dbsFiles = []

        binds = []
        for f in fileObjs:
            binds.append(f["id"])

        loadFiles = self.daoFactory(classname = "DBSBufferFiles.LoadBulkFilesByID")
        results = loadFiles.execute(files = binds, transaction = False)

        for entry in results:
            # Add loaded information
            dbsfile = DBSBufferFile(id=entry['id'])
            dbsfile.update(entry)
            dbsFiles.append(dbsfile)

        for dbsfile in dbsFiles:
            if 'runInfo' in dbsfile.keys():
            # Then we have to replace it with a real run
                for r in dbsfile['runInfo'].keys():
                    run = Run(runNumber = r)
                    run.extend(dbsfile['runInfo'][r])
                    dbsfile.addRun(run)
                del dbsfile['runInfo']
            if 'parentLFNs' in dbsfile.keys():
                # Then we have some parents
                for lfn in dbsfile['parentLFNs']:
                    newFile = DBSBufferFile(lfn = lfn)
                    dbsfile['parents'].add(newFile)
                del dbsfile['parentLFNs']

        return dbsFiles
예제 #21
0
    def testSetLocationTransaction(self):
        """
        _testSetLocationTransaction_

        Create a file at specific locations and commit everything to the
        database.  Reload the file from the database and verify that the
        locations are correct.  Rollback the database transaction and once
        again reload the file.  Verify that the original locations are back.
        """
        testFileA = File(lfn="/this/is/a/lfn",
                         size=1024,
                         events=10,
                         checksums={'cksum': 1})
        testFileA.addRun(Run(1, *[45]))
        testFileA.create()
        testFileA.setLocation(["se1.fnal.gov"])

        myThread = threading.currentThread()
        myThread.transaction.begin()

        testFileA.setLocation(["se1.cern.ch"])
        testFileA.setLocation(["bunkse1.fnal.gov", "bunkse1.cern.ch"],
                              immediateSave=False)

        testFileB = File(id=testFileA["id"])
        testFileB.loadData()

        goldenLocations = ["se1.fnal.gov", "se1.cern.ch"]

        for location in testFileB["locations"]:
            assert location in goldenLocations, \
                   "ERROR: Unknown file location"
            goldenLocations.remove(location)

        assert len(goldenLocations) == 0, \
              "ERROR: Some locations are missing"

        myThread.transaction.rollback()
        testFileB.loadData()

        goldenLocations = ["se1.fnal.gov"]

        for location in testFileB["locations"]:
            assert location in goldenLocations, \
                   "ERROR: Unknown file location"
            goldenLocations.remove(location)

        assert len(goldenLocations) == 0, \
              "ERROR: Some locations are missing"
        return
예제 #22
0
    def createTestJobs(self, nJobs, cacheDir):
        """
        _createTestJobs_

        Create several jobs
        """


        testWorkflow = Workflow(spec = "spec.xml", owner = "Simon",
                                name = "wf001", task="Test")
        testWorkflow.create()

        testWMBSFileset = Fileset(name = "TestFileset")
        testWMBSFileset.create()

        testSubscription = Subscription(fileset = testWMBSFileset,
                                        workflow = testWorkflow,
                                        type = "Processing",
                                        split_algo = "FileBased")
        testSubscription.create()

        testJobGroup = JobGroup(subscription = testSubscription)
        testJobGroup.create()

        # Create a file
        testFileA = File(lfn = "/this/is/a/lfnA", size = 1024, events = 10)
        testFileA.addRun(Run(10, *[12312]))
        testFileA.setLocation('malpaquet')
        testFileA.create()

        baseName = makeUUID()

        # Now create a job
        for i in range(nJobs):
            testJob = Job(name = '%s-%i' % (baseName, i))
            testJob.addFile(testFileA)
            testJob['location'] = 'malpaquet'
            testJob['retry_count'] = 1
            testJob['retry_max'] = 10
            testJob.create(testJobGroup)
            testJob.save()
            testJobGroup.add(testJob)

        testJobGroup.commit()

        # Set test job caches
        for job in testJobGroup.jobs:
            job.setCache(cacheDir)

        return testJobGroup
예제 #23
0
    def _addDBSFileToWMBSFile(self, dbsFile, storageElements, inFileset=True):
        """
        There are two assumptions made to make this method behave properly,
        1. DBS returns only one level of ParentList.
           If DBS returns multiple level of parentage, it will be still get handled.
           However that might not be what we wanted. In that case, restrict to one level.
        2. Assumes parents files are in the same location as child files.
           This is not True in general case, but workquue should only select work only
           where child and parent files are in the same location
        """
        wmbsParents = []
        dbsFile.setdefault("ParentList", [])
        for parent in dbsFile["ParentList"]:
            wmbsParents.append(
                self._addDBSFileToWMBSFile(parent,
                                           storageElements,
                                           inFileset=False))

        checksums = {}
        if dbsFile.get('Checksum'):
            checksums['cksum'] = dbsFile['Checksum']
        if dbsFile.get('Adler32'):
            checksums['adler32'] = dbsFile['Adler32']

        wmbsFile = File(
            lfn=dbsFile["LogicalFileName"],
            size=dbsFile["FileSize"],
            events=dbsFile["NumberOfEvents"],
            checksums=checksums,
            #TODO: need to get list of parent lfn
            parents=wmbsParents,
            locations=set(storageElements))

        for lumi in dbsFile['LumiList']:
            run = Run(lumi['RunNumber'], lumi['LumiSectionNumber'])
            wmbsFile.addRun(run)

        self._addToDBSBuffer(dbsFile, checksums, storageElements)

        logging.info("WMBS File: %s\n on Location: %s" %
                     (wmbsFile['lfn'], wmbsFile['newlocations']))

        if inFileset:
            wmbsFile['inFileset'] = True
        else:
            wmbsFile['inFileset'] = False

        self.wmbsFilesToCreate.append(wmbsFile)

        return wmbsFile
예제 #24
0
    def getInputFilesFromStep(self, stepName, inputSource=None):
        """
        _getInputFilesFromStep_

        Retrieve a list of input files from the given step.
        """
        step = self.retrieveStep(stepName)

        if inputSource is None:
            inputSources = step.input.listSections_()
        else:
            inputSources = [inputSource]

        inputFiles = []
        for inputSource in inputSources:
            source = getattr(step.input, inputSource)
            for fileNum in range(source.files.fileCount):
                fwjrFile = getattr(source.files, "file%d" % fileNum)

                lfn = getattr(fwjrFile, "lfn", None)
                pfn = getattr(fwjrFile, "pfn", None)
                size = getattr(fwjrFile, "size", 0)
                events = getattr(fwjrFile, "events", 0)
                branches = getattr(fwjrFile, "branches", [])
                catalog = getattr(fwjrFile, "catalog", None)
                guid = getattr(fwjrFile, "guid", None)
                inputSourceClass = getattr(fwjrFile, "input_source_class",
                                           None)
                moduleLabel = getattr(fwjrFile, "module_label", None)
                inputType = getattr(fwjrFile, "input_type", None)

                inputFile = File(lfn=lfn, size=size, events=events)
                inputFile["pfn"] = pfn
                inputFile["branches"] = branches
                inputFile["catalog"] = catalog
                inputFile["guid"] = guid
                inputFile["input_source_class"] = inputSourceClass
                inputFile["module_label"] = moduleLabel
                inputFile["input_type"] = inputType

                runSection = getattr(fwjrFile, "runs")
                runNumbers = runSection.listSections_()

                for runNumber in runNumbers:
                    lumiTuple = getattr(runSection, str(runNumber))
                    inputFile.addRun(Run(int(runNumber), *lumiTuple))

                inputFiles.append(inputFile)

        return inputFiles
예제 #25
0
    def testA(self):
        """instantiation"""

        run1 = Run()
        self.assertEqual(run1.run, None)
        self.assertEqual(len(run1), 0)


        run2 = Run(1000000)
        self.assertEqual(run2.run, 1000000)
        self.assertEqual(len(run2), 0)

        run3 = Run(1000000, 1)
        self.assertEqual(run3.run, 1000000)
        self.assertEqual(len(run3), 1)
        self.assertEqual(run3[0], 1)


        run4 = Run(1000000, 1,2,3,4,5)
        self.assertEqual(run4.run, 1000000)
        self.assertEqual(len(run4), 5)
        self.assertEqual(run4[0], 1)
        self.assertEqual(run4[4], 5)
예제 #26
0
    def testLocationMerging(self):
        """
        _testLocationMerging_

        Verify that files residing on different SEs are not merged together in
        the same job.
        """
        self.stuffWMBS()

        locationAction = self.daoFactory(classname="Locations.New")
        locationAction.execute(siteName="s2", seName="somese2.cern.ch")

        fileSite2 = File(lfn="fileSite2",
                         size=4098,
                         events=1024,
                         first_event=0,
                         locations=set(["somese2.cern.ch"]))
        fileSite2.addRun(Run(1, *[46]))
        fileSite2.create()

        self.mergeFileset.addFile(fileSite2)
        self.mergeFileset.commit()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.mergeSubscription)

        result = jobFactory(min_merge_size=4097,
                            max_merge_size=99999999,
                            max_merge_events=999999999,
                            merge_across_runs=False)

        assert len(result) == 1, \
               "ERROR: More than one JobGroup returned."

        assert len(result[0].jobs) == 3, \
               "ERROR: Three jobs should have been returned."

        for job in result[0].jobs:
            firstInputFile = job.getFiles()[0]
            baseLocation = list(firstInputFile["locations"])[0]

            for inputFile in job.getFiles():
                assert len(inputFile["locations"]) == 1, \
                       "Error: Wrong number of locations"

                assert list(inputFile["locations"])[0] == baseLocation, \
                       "Error: Wrong location."

        return
예제 #27
0
    def testLocationMerging(self):
        """
        _testLocationMerging_

        Verify that files residing on different SEs are not merged together in
        the same job.
        """
        self.stuffWMBS()

        locationAction = self.daoFactory(classname="Locations.New")
        locationAction.execute(siteName="T1_UK_RAL", pnn="T1_UK_RAL_Disk")

        fileSite2 = File(lfn="fileRAL",
                         size=4098,
                         events=1024,
                         first_event=0,
                         locations=set(["T1_UK_RAL_Disk"]))
        fileSite2.addRun(Run(1, *[46]))
        fileSite2.create()

        self.mergeFileset.addFile(fileSite2)
        self.mergeFileset.commit()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.mergeSubscription)

        result = jobFactory(min_merge_size=4097,
                            max_merge_size=99999999,
                            max_merge_events=999999999,
                            merge_across_runs=False)

        assert len(result) == 1, \
            "ERROR: More than one JobGroup returned."

        assert len(result[0].jobs) == 3, \
            "ERROR: Three jobs should have been returned."

        ralJobs = 0
        fnalJobs = 0
        for job in result[0].jobs:
            if job["possiblePSN"] == set(["T1_UK_RAL"]):
                ralJobs += 1
            elif job["possiblePSN"] == set(["T1_US_FNAL"]):
                fnalJobs += 1

        self.assertEqual(ralJobs, 1)
        self.assertEqual(fnalJobs, 2)

        return
예제 #28
0
    def createFile(self, lfn, events, run, lumis, location):
        """
        _createFile_

        Create a file for testing
        """
        newFile = File(lfn=lfn, size=1000,
                       events=events)
        lumiList = []
        for lumi in range(lumis):
            lumiList.append((run * lumis) + lumi)
        newFile.addRun(Run(run, *lumiList))
        newFile.setLocation(location)
        return newFile
예제 #29
0
    def getOutputFile(self, fileName, outputModule, step):
        """
        _getOutputFile_

        Takes a fileRef object and returns a DataStructs/File object as output
        """

        outputMod = self.getOutputModule(step=step, outputModule=outputModule)

        if not outputMod:
            return None

        fileRef = getattr(outputMod.files, fileName, None)
        newFile = File(locations=set())

        #Locations
        newFile.setLocation(getattr(fileRef, "location", None))

        #Runs
        runList = fileRef.runs.listSections_()
        for run in runList:
            lumis = getattr(fileRef.runs, run)
            newRun = Run(int(run), *lumis)
            newFile.addRun(newRun)

        newFile["lfn"] = getattr(fileRef, "lfn", None)
        newFile["pfn"] = getattr(fileRef, "pfn", None)
        newFile["events"] = int(getattr(fileRef, "events", 0))
        newFile["size"] = int(getattr(fileRef, "size", 0))
        newFile["branches"] = getattr(fileRef, "branches", [])
        newFile["input"] = getattr(fileRef, "input", [])
        newFile["inputpfns"] = getattr(fileRef, "inputpfns", [])
        newFile["branch_hash"] = getattr(fileRef, "branch_hash", None)
        newFile["catalog"] = getattr(fileRef, "catalog", "")
        newFile["guid"] = getattr(fileRef, "guid", "")
        newFile["module_label"] = getattr(fileRef, "module_label", "")
        newFile["checksums"] = getattr(fileRef, "checksums", {})
        newFile["merged"] = bool(getattr(fileRef, "merged", False))
        newFile["dataset"] = getattr(fileRef, "dataset", {})
        newFile["acquisitionEra"] = getattr(fileRef, 'acquisitionEra', None)
        newFile["processingVer"] = getattr(fileRef, 'processingVer', None)
        newFile["validStatus"] = getattr(fileRef, 'validStatus', None)
        newFile["globalTag"] = getattr(fileRef, 'globalTag', None)
        newFile["prep_id"] = getattr(fileRef, 'prep_id', None)
        newFile['configURL'] = getattr(fileRef, 'configURL', None)
        newFile['inputPath'] = getattr(fileRef, 'inputPath', None)
        newFile["outputModule"] = outputModule
        newFile["fileRef"] = fileRef

        return newFile
예제 #30
0
    def createSubscription(self, nFiles, lumisPerFile, twoSites=False):
        """
        _createSubscription_

        Create a subscription for testing
        """

        baseName = makeUUID()

        testFileset = Fileset(name=baseName)
        for i in range(nFiles):
            newFile = File(lfn='%s_%i' % (baseName, i), size=1000, events=100)
            lumis = []
            for lumi in range(lumisPerFile):
                lumis.append((i * 100) + lumi)
            newFile.addRun(Run(i, *lumis))
            newFile.setLocation('blenheim')
            testFileset.addFile(newFile)
        if twoSites:
            for i in range(nFiles):
                newFile = File(lfn='%s_%i_2' % (baseName, i),
                               size=1000,
                               events=100)
                lumis = []
                for lumi in range(lumisPerFile):
                    lumis.append(5 + 10 * (i * 100) +
                                 lumi)  #lumis should be different
                newFile.addRun(Run(i, *lumis))
                newFile.setLocation('malpaquet')
                testFileset.addFile(newFile)

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="LumiBased",
                                        type="Processing")

        return testSubscription
예제 #31
0
def injectFilesFromDBS(inputFileset, datasetPath):
    """
    _injectFilesFromDBS_

    """
    print("injecting files from %s into %s, please wait..." % (datasetPath, inputFileset.name))
    args={}
    args["url"] = "https://cmsweb.cern.ch/dbs/prod/global/DBSReader"
    args["mode"] = "GET"
    dbsApi = DbsApi(args)
    dbsResults = dbsApi.listFileArray(path = datasetPath, retriveList = ["retrive_lumi", "retrive_run"])
    # Limiter on number of files
    dbsResults = dbsResults[0:20]
    print("  found %d files, inserting into wmbs..." % (len(dbsResults)))

    for dbsResult in dbsResults:
        myFile = File(lfn = dbsResult["LogicalFileName"], size = dbsResult["FileSize"],
                      events = dbsResult["NumberOfEvents"], checksums = {"cksum": dbsResult["Checksum"]},
                      locations = "cmssrm.fnal.gov", merged = True)
        myRun = Run(runNumber = dbsResult["LumiList"][0]["RunNumber"])
        for lumi in dbsResult["LumiList"]:
            myRun.appendLumi(lumi["LumiSectionNumber"])
        myFile.addRun(myRun)
        myFile.create()
        inputFileset.addFile(myFile)

        dbsFile = DBSBufferFile(lfn = dbsResult["LogicalFileName"], size = dbsResult["FileSize"],
                                events = dbsResult["NumberOfEvents"], checksums = {"cksum": dbsResult["Checksum"]},
                                locations = "cmssrm.fnal.gov", status = "NOTUPLOADED")
        dbsFile.setDatasetPath(datasetPath)
        dbsFile.setAlgorithm(appName = "cmsRun", appVer = "Unknown", appFam = "Unknown",
                             psetHash = "Unknown", configContent = "Unknown")
        dbsFile.create()

    inputFileset.commit()
    inputFileset.markOpen(False)
    return
예제 #32
0
    def testWorkUnitHashAndCompare(self):
        """
        Test that the hash function works and that the comparisons work
        """

        testRunLumi0 = Run(TEST_RUN_NUMBER, TEST_LUMI)
        testWorkUnit0 = WorkUnit(taskID=TEST_TASKID,
                                 lastUnitCount=TEST_LAST_UNIT_COUNT,
                                 fileid=TEST_FILEID,
                                 runLumi=testRunLumi0)
        testRunLumi1 = Run(TEST_RUN_NUMBER, TEST_LUMI)
        testWorkUnit1 = WorkUnit(taskID=TEST_TASKID,
                                 lastUnitCount=TEST_LAST_UNIT_COUNT,
                                 fileid=TEST_FILEID,
                                 runLumi=testRunLumi1)
        testRunLumi2 = Run(TEST_RUN_NUMBER, TEST_LUMI + 1)
        testWorkUnit2 = WorkUnit(taskID=TEST_TASKID,
                                 lastUnitCount=TEST_LAST_UNIT_COUNT,
                                 fileid=TEST_FILEID,
                                 runLumi=testRunLumi2)
        testRunLumi3 = Run(TEST_RUN_NUMBER + 1, TEST_LUMI)
        testWorkUnit3 = WorkUnit(taskID=TEST_TASKID,
                                 lastUnitCount=TEST_LAST_UNIT_COUNT,
                                 fileid=TEST_FILEID,
                                 runLumi=testRunLumi3)

        # Tests for hashers
        self.assertEqual(hash(testWorkUnit0), hash(testWorkUnit1))
        self.assertNotEqual(hash(testWorkUnit1), hash(testWorkUnit2))
        self.assertNotEqual(hash(testWorkUnit1), hash(testWorkUnit3))

        # Tests for comparisons
        self.assertEqual(testWorkUnit0, testWorkUnit1)
        self.assertLess(testWorkUnit1, testWorkUnit2)
        self.assertLess(testWorkUnit2, testWorkUnit3)

        return
예제 #33
0
def injectFilesFromDBS(inputFileset, datasetPath):
    """
    _injectFilesFromDBS_

    """
    print("injecting files from %s into %s, please wait..." % (datasetPath, inputFileset.name))
    args={}
    args["url"] = "https://cmsweb-prod.cern.ch/dbs/prod/global/DBSReader"
    args["version"] = "DBS_2_0_9"
    args["mode"] = "GET"
    dbsApi = DbsApi(args)
    dbsResults = dbsApi.listFileArray(path = datasetPath, retriveList = ["retrive_lumi", "retrive_run"])
    dbsResults = dbsResults[0:10]
    print("  found %d files, inserting into wmbs..." % (len(dbsResults)))

    for dbsResult in dbsResults:
        myFile = File(lfn = dbsResult["LogicalFileName"], size = dbsResult["FileSize"],
                      events = dbsResult["NumberOfEvents"], checksums = {"cksum": dbsResult["Checksum"]},
                      locations = "cmssrm.fnal.gov", merged = True)
        myRun = Run(runNumber = dbsResult["LumiList"][0]["RunNumber"])
        for lumi in dbsResult["LumiList"]:
            myRun.appendLumi(lumi["LumiSectionNumber"])
        myFile.addRun(myRun)
        myFile.create()
        inputFileset.addFile(myFile)

        dbsFile = DBSBufferFile(lfn = dbsResult["LogicalFileName"], size = dbsResult["FileSize"],
                                events = dbsResult["NumberOfEvents"], checksums = {"cksum": dbsResult["Checksum"]},
                                locations = "cmssrm.fnal.gov", status = "LOCAL")
        dbsFile.setDatasetPath(datasetPath)
        dbsFile.setAlgorithm(appName = "cmsRun", appVer = "Unknown", appFam = "Unknown",
                             psetHash = "Unknown", configContent = "Unknown")
        dbsFile.create()

    inputFileset.commit()
    inputFileset.markOpen(False)
    return
예제 #34
0
    def getChunkFiles(self,
                      collectionName,
                      filesetName,
                      chunkOffset,
                      chunkSize=100,
                      user="******",
                      group="cmsdataops"):
        """
        _getChunkFiles_

        Retrieve a chunk of files from the given collection and task.
        """
        chunkFiles = []
        result = self.couchdb.loadView(
            "ACDC", "owner_coll_fileset_files", {
                "startkey": [group, user, collectionName, filesetName],
                "endkey": [group, user, collectionName, filesetName, {}],
                "limit": chunkSize,
                "skip": chunkOffset,
            }, [])

        for row in result["rows"]:
            resultRow = row['value']
            newFile = File(lfn=resultRow["lfn"],
                           size=resultRow["size"],
                           events=resultRow["events"],
                           parents=set(resultRow["parents"]),
                           locations=set(resultRow["locations"]),
                           merged=resultRow["merged"])
            for run in resultRow["runs"]:
                newRun = Run(run["run_number"])
                newRun.extend(run["lumis"])
                newFile.addRun(newRun)

            chunkFiles.append(newFile)

        return chunkFiles
예제 #35
0
파일: Harvest_t.py 프로젝트: vytjan/WMCore
    def setUp(self):
        """
        _setUp_

        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()

        self.testInit.setSchema(customModules=["WMCore.WMBS"])

        self.splitterFactory = SplitterFactory(package="WMCore.JobSplitting")

        myThread = threading.currentThread()
        self.myThread = myThread
        daoFactory = DAOFactory(package="WMCore.WMBS",
                                logger=logging,
                                dbinterface=myThread.dbi)
        self.WMBSFactory = daoFactory

        config = self.getConfig()
        self.changer = ChangeState(config)

        myResourceControl = ResourceControl()
        myResourceControl.insertSite("T1_US_FNAL", 10, 20, "T1_US_FNAL_Disk", "T1_US_FNAL")
        myResourceControl.insertSite("T1_US_FNAL", 10, 20, "T3_US_FNALLPC", "T1_US_FNAL")
        myResourceControl.insertSite("T2_CH_CERN", 10, 20, "T2_CH_CERN", "T2_CH_CERN")

        self.fileset1 = Fileset(name="TestFileset1")
        for fileNum in range(11):
            newFile = File("/some/file/name%d" % fileNum, size=1000, events=100)
            newFile.addRun(Run(1, *[1]))
            newFile.setLocation('T1_US_FNAL_Disk')
            self.fileset1.addFile(newFile)

        self.fileset1.create()

        workflow1 = Workflow(spec="spec.xml", owner="hufnagel", name="TestWorkflow1", task="Test")
        workflow1.create()

        self.subscription1 = Subscription(fileset=self.fileset1,
                                          workflow=workflow1,
                                          split_algo="Harvest",
                                          type="Harvesting")

        self.subscription1.create()
        self.configFile = EmulatorSetup.setupWMAgentConfig()

        return
예제 #36
0
    def load(self, parentage=0):
        """
        _load_

        The the file and all it's metadata from the database.  Either the LFN
        or the file's ID must be specified before this is called.
        """
        existingTransaction = self.beginTransaction()

        if self["id"] != -1:
            action = self.daofactory(classname="DBSBufferFiles.GetByID")
            result = action.execute(self["id"], conn=self.getDBConn(),
                                    transaction=self.existingTransaction())
        else:
            action = self.daofactory(classname="DBSBufferFiles.GetByLFN")
            result = action.execute(self["lfn"], conn=self.getDBConn(),
                                    transaction=self.existingTransaction())

        self.update(result)

        action = self.daofactory(classname='DBSBufferFiles.GetChecksum')
        result = action.execute(fileid=self['id'], conn=self.getDBConn(),
                                transaction=self.existingTransaction())
        self["checksums"] = result

        action = self.daofactory(classname="DBSBufferFiles.GetRunLumiFile")
        runs = action.execute(self["lfn"], conn=self.getDBConn(),
                              transaction=self.existingTransaction())
        for r in runs:
            self.addRun(run=Run(r, *runs[r]))

        action = self.daofactory(classname="DBSBufferFiles.GetLocation")
        self["locations"] = action.execute(self["lfn"], conn=self.getDBConn(),
                                           transaction=self.existingTransaction())

        self["newlocations"].clear()
        self["parents"].clear()

        if parentage > 0:
            action = self.daofactory(classname="DBSBufferFiles.GetParents")
            lfns = action.execute(self["lfn"], conn=self.getDBConn(),
                                  transaction=self.existingTransaction())
            for lfn in lfns:
                parentFile = DBSBufferFile(lfn=lfn)
                parentFile.load(parentage=parentage - 1)
                self["parents"].add(parentFile)

        self.commitTransaction(existingTransaction)
        return
예제 #37
0
def injectFilesFromDBS(inputFileset, datasetPath):
    """
    _injectFilesFromDBS_

    """
    print "injecting files from %s into %s, please wait..." % (
        datasetPath, inputFileset.name)
    args = {}
    args[
        "url"] = "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet"
    args["mode"] = "GET"
    dbsApi = DbsApi(args)
    dbsResults = dbsApi.listFiles(path=datasetPath,
                                  retriveList=["retrive_lumi", "retrive_run"])
    # Limiter on number of files
    dbsResults = dbsResults[0:20]
    print "  found %d files, inserting into wmbs..." % (len(dbsResults))

    for dbsResult in dbsResults:
        myFile = File(lfn=dbsResult["LogicalFileName"],
                      size=dbsResult["FileSize"],
                      events=dbsResult["NumberOfEvents"],
                      checksums={"cksum": dbsResult["Checksum"]},
                      locations="cmssrm.fnal.gov",
                      merged=True)
        myRun = Run(runNumber=dbsResult["LumiList"][0]["RunNumber"])
        for lumi in dbsResult["LumiList"]:
            myRun.lumis.append(lumi["LumiSectionNumber"])
        myFile.addRun(myRun)
        myFile.create()
        inputFileset.addFile(myFile)

        dbsFile = DBSBufferFile(lfn=dbsResult["LogicalFileName"],
                                size=dbsResult["FileSize"],
                                events=dbsResult["NumberOfEvents"],
                                checksums={"cksum": dbsResult["Checksum"]},
                                locations="cmssrm.fnal.gov",
                                status="NOTUPLOADED")
        dbsFile.setDatasetPath(datasetPath)
        dbsFile.setAlgorithm(appName="cmsRun",
                             appVer="Unknown",
                             appFam="Unknown",
                             psetHash="Unknown",
                             configContent="Unknown")
        dbsFile.create()

    inputFileset.commit()
    inputFileset.markOpen(False)
    return
예제 #38
0
파일: WorkUnit_t.py 프로젝트: vytjan/WMCore
    def testGetInfo(self):
        testRunLumi = Run(TEST_RUN_NUMBER, TEST_LUMI)
        testWorkUnit = WorkUnit(taskID=TEST_TASKID,
                                lastUnitCount=TEST_LAST_UNIT_COUNT,
                                fileid=TEST_FILEID,
                                runLumi=testRunLumi)

        info = testWorkUnit.getInfo()

        # Test the things we set
        self.assertEqual(info[0], TEST_TASKID)
        self.assertEqual(info[2], TEST_LAST_UNIT_COUNT)
        self.assertEqual(info[7], TEST_FILEID)
        self.assertEqual(info[8].run, TEST_RUN_NUMBER)
        self.assertItemsEqual(info[8].lumis, [TEST_LUMI])

        # Test the defaults we did not set
        self.assertEqual(info[1], 0)
        self.assertGreater(info[3], 0)
        self.assertEqual(info[4], 0)
        self.assertGreaterEqual(info[5], 0)
        self.assertGreaterEqual(info[6], 0)

        # Run another test by overriding defaults of things with default values
        testRetries = 10
        testSubmitTime = 10 * 365 * 24 * 3600
        testStatus = 4
        testFirstEvent = 100
        testLastEvent = 600

        testWorkUnit = WorkUnit(taskID=TEST_TASKID,
                                lastUnitCount=TEST_LAST_UNIT_COUNT,
                                fileid=TEST_FILEID,
                                runLumi=testRunLumi,
                                retryCount=testRetries,
                                lastSubmitTime=testSubmitTime,
                                status=testStatus,
                                firstEvent=testFirstEvent,
                                lastEvent=testLastEvent)
        info = testWorkUnit.getInfo()

        # Test the defaults we overrode
        self.assertEqual(info[1], testRetries)
        self.assertEqual(info[3], testSubmitTime)
        self.assertEqual(info[4], testStatus)
        self.assertEqual(info[5], testFirstEvent)
        self.assertEqual(info[6], testLastEvent)

        return
예제 #39
0
    def stuffACDCDatabase(self, numFiles=50, lumisPerFile=20, lumisPerACDCRecord=2):
        """
        _stuffACDCDatabase_

        Fill the ACDC database with ACDC records, both for processing
        and merge
        """
        filesetName = '/%s/DataProcessing' % self.workflowName

        for i in range(numFiles):
            for j in range(1, lumisPerFile + 1, lumisPerACDCRecord):
                lfn = '/store/data/a/%d' % i
                acdcFile = File(lfn=lfn, size=100, events=250, locations=self.validLocations, merged=1)
                run = Run(i + 1, *range(j, min(j + lumisPerACDCRecord, lumisPerFile + 1)))
                acdcFile.addRun(run)
                acdcDoc = {'collection_name': self.workflowName,
                           'collection_type': 'ACDC.CollectionTypes.DataCollection',
                           'files': {lfn: acdcFile},
                           'fileset_name': filesetName}
                self.acdcDB.queue(acdcDoc)

        filesetName = '/%s/DataProcessing/DataProcessingMergeRECOoutput' % self.workflowName
        for i in range(numFiles):
            for j in range(1, lumisPerFile + 1, lumisPerACDCRecord):
                lfn = '/store/unmerged/b/%d' % i
                acdcFile = File(lfn=lfn, size=100, events=250, locations=set([choice(self.validLocations)]), merged=0)
                run = Run(i + 1, *range(j, min(j + lumisPerACDCRecord, lumisPerFile + 1)))
                acdcFile.addRun(run)
                acdcDoc = {'collection_name': self.workflowName,
                           'collection_type': 'ACDC.CollectionTypes.DataCollection',
                           'files': {lfn: acdcFile},
                           'fileset_name': filesetName}
                self.acdcDB.queue(acdcDoc)
        self.acdcDB.commit()

        return
예제 #40
0
    def populateFilesFromWMBS(self, filesByLocation):
        """
        Load the lumi information for files from WMBS

        Args:
            filesByLocation: the files at the location currently under consideration

        Returns: nothing
        """

        fileLumis = self.loadRunLumi.execute(files=filesByLocation)
        for f in filesByLocation:
            lumiDict = fileLumis.get(f['id'], {})
            for run in lumiDict.keys():
                f.addRun(run=Run(run, *lumiDict[run]))
예제 #41
0
    def generateFakeMCFile(self, numEvents=100, firstEvent=1, lastEvent=100,
                           firstLumi=1, lastLumi=10, existingSub=None):
        # MC comes with only one MCFakeFile
        newFile = File("MCFakeFileTest", size=1000, events=numEvents)
        newFile.setLocation('se01')
        if firstLumi == lastLumi:
            newFile.addRun(Run(1, *list(range(firstLumi, lastLumi + 1))))
        else:
            newFile.addRun(Run(1, *list(range(firstLumi, lastLumi))))
        newFile["first_event"] = firstEvent
        newFile["last_event"] = lastEvent

        if existingSub is None:
            singleMCFileset = Fileset(name="MCTestFileset")
            singleMCFileset.addFile(newFile)
            testWorkflow = Workflow()
            existingSub = Subscription(fileset=singleMCFileset,
                                       workflow=testWorkflow,
                                       split_algo="EventBased",
                                       type="Production")
        else:
            existingSub['fileset'].addFile(newFile)

        return existingSub
예제 #42
0
    def createFile(lfn, events, run, lumis, location, lumiMultiplier=None):
        """
        _createFile_

        Create a file for testing
        """
        if lumiMultiplier is None:
            lumiMultiplier = run

        newFile = File(lfn=lfn, size=1000, events=events)
        lumiList = []
        for lumi in range(lumis):
            lumiList.append((lumiMultiplier * lumis) + lumi)
        newFile.addRun(Run(run, *lumiList))
        newFile.setLocation(location)
        return newFile
예제 #43
0
 def generateFakeMCFile(self, numEvents = 100, firstEvent = 1,
                        lastEvent = 100, firstLumi = 1, lastLumi = 10):
     #MC comes with only one MCFakeFile
     singleMCFileset = Fileset(name = "MCTestFileset")
     newFile = File("MCFakeFileTest", size = 1000, events = numEvents)
     newFile.setLocation('se01')
     newFile.addRun(Run(1, *range(firstLumi, lastLumi + 1)))
     newFile["first_event"] = firstEvent
     newFile["last_event"] = lastEvent
     testWorkflow = Workflow()
     singleMCFileset.addFile(newFile)
     singleMCFileSubscription = Subscription(fileset = singleMCFileset,
                                             workflow = testWorkflow,
                                             split_algo = "EventBased",
                                             type = "Production")
     return singleMCFileSubscription
예제 #44
0
    def createParentFiles(self,
                          acqEra,
                          nFiles=10,
                          workflowName='TestWorkload',
                          taskPath='/TestWorkload/DataTest'):
        """
        _createParentFiles_

        Create several parentless files in DBSBuffer.  This simulates raw files
        in the T0.
        """
        workflowId = self.injectWorkflow(workflowName=workflowName,
                                         taskPath=taskPath)
        parentlessFiles = []

        baseLFN = "/store/data/%s/Cosmics/RAW/v1/000/143/316/" % (acqEra)
        for i in range(nFiles):
            testFile = DBSBufferFile(lfn=baseLFN + makeUUID() + ".root",
                                     size=1024,
                                     events=20,
                                     checksums={"cksum": 1},
                                     workflowId=workflowId)
            testFile.setAlgorithm(appName="cmsRun",
                                  appVer="CMSSW_3_1_1",
                                  appFam="RAW",
                                  psetHash="GIBBERISH",
                                  configContent="MOREGIBBERISH")
            testFile.setDatasetPath("/Cosmics/%s-v1/RAW" % (acqEra))

            testFile['block_close_max_wait_time'] = 1000000
            testFile['block_close_max_events'] = 1000000
            testFile['block_close_max_size'] = 1000000
            testFile['block_close_max_files'] = 1000000

            lumis = []
            for j in range(10):
                lumis.append((i * 10) + j)
            testFile.addRun(Run(143316, *lumis))

            testFile.setAcquisitionEra(acqEra)
            testFile.setProcessingVer("1")
            testFile.setGlobalTag("START54::All")
            testFile.create()
            testFile.setLocation("malpaquet")
            parentlessFiles.append(testFile)

        return parentlessFiles
예제 #45
0
    def execute(self, *args, **kwargs):
        self.logger.info(
            "Data discovery and splitting for %s using user-provided files" %
            kwargs['task']['tm_taskname'])

        userfiles = kwargs['task']['tm_user_files']
        splitting = kwargs['task']['tm_split_algo']
        total_units = kwargs['task']['tm_totalunits']
        if not userfiles or splitting != 'FileBased':
            if not userfiles:
                msg = "No files specified to process for task %s." % kwargs[
                    'task']['tm_taskname']
            if splitting != 'FileBased':
                msg = "Data.splitting must be set to 'FileBased' when using a custom set of files."
            raise TaskWorkerException(msg)

        if hasattr(self.config.Sites, 'available'):
            locations = self.config.Sites.available
        else:
            with self.config.TaskWorker.envForCMSWEB:
                configDict = {
                    "cacheduration": 1,
                    "pycurl": True
                }  # cache duration is in hours
                resourceCatalog = CRIC(logger=self.logger,
                                       configDict=configDict)
                locations = resourceCatalog.getAllPSNs()

        userFileset = Fileset(name=kwargs['task']['tm_taskname'])
        self.logger.info("There are %d files specified by the user." %
                         len(userfiles))
        if total_units > 0:
            self.logger.info("Will run over the first %d files." % total_units)
        file_counter = 0
        for userfile, idx in zip(userfiles, range(len(userfiles))):
            newFile = File(userfile, size=1000, events=1)
            newFile.setLocation(locations)
            newFile.addRun(Run(1, idx))
            newFile["block"] = 'UserFilesFakeBlock'
            newFile["first_event"] = 1
            newFile["last_event"] = 2
            userFileset.addFile(newFile)
            file_counter += 1
            if total_units > 0 and file_counter >= total_units:
                break

        return Result(task=kwargs['task'], result=userFileset)
예제 #46
0
    def populateACDCCouch(self,
                          numFiles=2,
                          lumisPerJob=35,
                          eventsPerJob=20000):
        """
        _populateACDCCouch_

        Create production files in couchDB to test the creation
        of ACDC jobs for the EventBased algorithm
        """
        # Define some constants
        workflowName = "ACDC_TestEventBased"
        filesetName = "/%s/Production" % workflowName
        owner = "*****@*****.**"
        group = "unknown"

        lumisPerFile = lumisPerJob * 250
        for i in range(numFiles):
            for j in range(250):
                lfn = "MCFakeFile-some-hash-%s" % str(i).zfill(5)
                acdcFile = File(lfn=lfn,
                                size=100,
                                events=eventsPerJob,
                                locations=self.validLocations,
                                merged=False,
                                first_event=1)
                run = Run(
                    1,
                    *range(1 + (i * lumisPerFile) + j * lumisPerJob,
                           (j + 1) * lumisPerJob + (i * lumisPerFile) + 2))
                acdcFile.addRun(run)
                acdcDoc = {
                    "collection_name": workflowName,
                    "collection_type": "ACDC.CollectionTypes.DataCollection",
                    "files": {
                        lfn: acdcFile
                    },
                    "fileset_name": filesetName,
                    "owner": {
                        "user": owner,
                        "group": group
                    }
                }
                self.couchDB.queue(acdcDoc)

        self.couchDB.commit()
        return
예제 #47
0
    def test03(self):
        """
        _test03_

        Test max input files threshold for single lumi

        small lumi, followed by large lumi
        expect 1 job for small lumi and 1 job for large

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2]:
            for i in range(lumi * 2):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxInputFiles'] = 3
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 2,
                         "ERROR: JobFactory didn't create two jobs")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        job = jobGroups[0].jobs[1]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        return
예제 #48
0
#!/usr/bin/env python

import random

from WMCore.FwkJobReport import Report
from WMCore.DataStructs.Run import Run
from WMCore.Services.UUIDLib import makeUUID

outputModules = ["write_A_Calo_RAW", "write_A_Cosmics_RAW",
                 "write_A_HcalHPDNoise_RAW", "write_A_MinimumBias_RAW",
                 "write_A_RandomTriggers_RAW", "write_A_Calibration_TestEnables_RAW",
                 "write_HLTDEBUG_Monitor_RAW"]

runInfo = Run(1)
runInfo.extendLumis([11, 12, 13, 14, 15])

for i in range(100):
    loadTestReport = Report.Report("cmsRun1")
    loadTestReport.addInputSource("PoolSource")
    inputFile = loadTestReport.addInputFile("PoolSource", lfn = makeUUID(),
                                            events = 600000, size = 600000)
    Report.addRunInfoToFile(inputFile, runInfo)

    for outputModule in outputModules:
        loadTestReport.addOutputModule(outputModule)
        datasetInfo = {"applicationName": "cmsRun", "applicationVersion": "CMSSW_3_3_5_patch3",
                       "primaryDataset": outputModule, "dataTier": "RAW",
                       "processedDataset": "LoadTest10"}
        fileAttrs = {"lfn": makeUUID(), "location": "cmssrm.fnal.gov",
                     "checksums": {"adler32": "ff810ec3", "cksum": "2212831827"},
                     "events": random.randrange(500, 5000, 50),
예제 #49
0
taskMaker = TaskMaker(workload, os.path.join(os.getcwd(), workloadName))
taskMaker.skipSubscription = True
taskMaker.processWorkload()

workload.save(workloadPath)

myThread = threading.currentThread()
myThread.transaction.begin()
for workloadTask in workload.taskIterator():
    inputFileset = Fileset(name = workloadTask.getPathName())
    inputFileset.create()

    virtualFile = File(lfn = "%s-virtual-input" % workloadTask.getPathName(),
                       size = 0, events = numEvents,
                       locations = set(["cmssrm.fnal.gov", "storm-fe-cms.cr.cnaf.infn.it",
                                        "cmssrm-fzk.gridka.de", "srm2.grid.sinica.edu.tw",
                                        "srm-cms.gridpp.rl.ac.uk", "ccsrm.in2p3.fr",
                                        "srmcms.pic.es"]), merged = False)

    myRun = Run(runNumber = 1)
    myRun.appendLumi(1)
    virtualFile.addRun(myRun)
    virtualFile.create()
    inputFileset.addFile(virtualFile)
    inputFileset.commit()

    myWMBSHelper = WMBSHelper(workload)
    myWMBSHelper._createSubscriptionsInWMBS(workloadTask.getPathName())

myThread.transaction.commit()
예제 #50
0
#!/usr/bin/env python

import random

from WMCore.FwkJobReport import Report
from WMCore.DataStructs.Run import Run
from WMCore.Services.UUIDLib import makeUUID

outputModules = ["outputModule1", "outputModule2", "outputModule3",
                 "outputModule4", "outputModule5", "outputModule6",
                 "outputModule7", "outputModule8", "outputModule9",
                 "outputModule10"]

runInfo = Run(1)
runInfo.extendLumis([11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
                     25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
                     39, 40])

totalReports = 25
inputFilesPerReport = 50

inputFileCounter = 0
for i in range(totalReports):
    loadTestReport = Report.Report("cmsRun1")
    loadTestReport.addInputSource("PoolSource")

    for j in range(inputFilesPerReport):
        inputFile = loadTestReport.addInputFile("PoolSource", lfn = "input%i" % inputFileCounter,
                                                events = 600000, size = 600000)
        inputFileCounter += 1
예제 #51
0
 # Have to transform this because JSON is too stupid to understand ints
 # Also for some reason we're getting a strange problem where the mask
 # isn't being loaded at all.  I'm not sure what to do there except drop it.
 try:
     for key in maskA['runAndLumis'].keys():
         maskA['runAndLumis'][int(key)] = maskA['runAndLumis'][key]
         del maskA['runAndLumis'][key]
 except KeyError:
     # We don't have a mask.  Not much we can do about this
     maskA = Mask()
 mask   = Mask()
 mask.update(maskA)
 runs   = []
 # Turn arbitrary format into real runs
 for r in runsA:
     run = Run(runNumber = r['run_number'])
     run.lumis = r.get('lumis', [])
     runs.append(run)
 # Get rid of runs that aren't in the mask
 runs = mask.filterRunLumisByMask(runs = runs)
 for err in errorCouch:
     task   = err['value']['task']
     step   = err['value']['step']
     errors = err['value']['error']
     logs   = err['value']['logs']
     start  = err['value']['start']
     stop   = err['value']['stop']
     if not task in workflowData['errors'].keys():
         workflowData['errors'][task] = {'failureTime': 0}
     if not step in workflowData['errors'][task].keys():
         workflowData['errors'][task][step] = {}
    def execute(self, jobID, conn = None, transaction = False):
        """
        _execute_

        Execute the SQL for the given job ID(s) and then format and return
        the result.
        """

        if type(jobID) != list:
            jobID = [jobID]

        binds = [{"jobid": x} for x in jobID]

        if not binds:
            return []

        #First load full file information with run/lumis
        filesResult = self.dbi.processData(self.fileSQL, binds, conn = conn,
                                           transaction = transaction)
        fileList = self.formatDict(filesResult)

        #Clear duplicates
        bindDict = {}
        for result in fileList:
            bindDict[result['id']] = 1
            result['newRuns'] = []
        fileBinds = [{'fileid' : x} for x in bindDict.keys()]

        #Load file information
        if len(fileBinds):
            lumiResult = self.dbi.processData(self.runLumiSQL, fileBinds, conn = conn,
                                              transaction = transaction)
            lumiList = self.formatDict(lumiResult)
            lumiDict = {}
            for l in lumiList:
                if not l['fileid'] in lumiDict.keys():
                    lumiDict[l['fileid']] = []
                lumiDict[l['fileid']].append(l)

            for f in fileList:
                fileRuns = {}
                if f['id'] in lumiDict.keys():
                    for l in lumiDict[f['id']]:
                        run = l['run']
                        lumi = l['lumi']
                        try:
                            fileRuns[run].append(lumi)
                        except KeyError:
                            fileRuns[run] = []
                            fileRuns[run].append(lumi)

                for r in fileRuns.keys():
                    newRun = Run(runNumber = r)
                    newRun.lumis = fileRuns[r]
                    f['newRuns'].append(newRun)

        filesForJobs = {}
        for f in fileList:
            jobid = f['jobid']
            if not jobid in filesForJobs.keys():
                filesForJobs[jobid] = {}
            if f['id'] not in filesForJobs[jobid].keys():
                wmbsFile = File(id = f['id'])
                wmbsFile.update(f)
                for r in wmbsFile['newRuns']:
                    wmbsFile.addRun(r)
                filesForJobs[jobid][f['id']] = wmbsFile


        #Add the file information to job objects and load the masks
        jobList = [Job(id = x) for x in jobID]
        for j in jobList:
            if j['id'] in filesForJobs.keys():
                j['input_files'] = filesForJobs[j['id']].values()
            j['mask'].load(j['id'])

        return jobList
예제 #53
0
    def execute(self, jobID, fileSelection = None,
                conn = None, transaction = False):
        """
        _execute_

        Execute the SQL for the given job ID and then format and return
        the result.
        """

        if type(jobID) == list:
            if len(jobID) < 1:
                # Nothing to do
                return []
            binds = jobID
        else:
            binds = {"jobid": jobID}

        result = self.dbi.processData(self.sql, binds, conn = conn,
                                      transaction = transaction)

        jobList = self.formatJobs(result)

        filesResult = self.dbi.processData(self.fileSQL, binds, conn = conn,
                                           transaction = transaction)
        fileList  = self.formatDict(filesResult)
        fileBinds = []
        if fileSelection:
            fileList = filter(lambda x : x['lfn'] in fileSelection[x['jobid']], fileList)
        for x in fileList:
            # Add new runs
            x['newRuns'] = []
            # Assemble unique list of binds
            if not {'fileid': x['id']} in fileBinds:
                fileBinds.append({'fileid': x['id']})

        parentList = []
        if len(fileBinds) > 0:
            parentResult = self.dbi.processData(self.parentSQL, fileBinds, conn = conn,
                                                transaction = transaction)
            parentList   = self.formatDict(parentResult)

            lumiResult = self.dbi.processData(self.runLumiSQL, fileBinds, conn = conn,
                                              transaction = transaction)
            lumiList = self.formatDict(lumiResult)
            lumiDict = {}
            for l in lumiList:
                if not l['fileid'] in lumiDict.keys():
                    lumiDict[l['fileid']] = []
                lumiDict[l['fileid']].append(l)

            for f in fileList:
                fileRuns = {}
                if f['id'] in lumiDict.keys():
                    for l in lumiDict[f['id']]:
                        run  = l['run']
                        lumi = l['lumi']
                        try:
                            fileRuns[run].append(lumi)
                        except KeyError:
                            fileRuns[run] = []
                            fileRuns[run].append(lumi)

                for r in fileRuns.keys():
                    newRun = Run(runNumber = r)
                    newRun.lumis = fileRuns[r]
                    f['newRuns'].append(newRun)

        filesForJobs = {}
        for f in fileList:
            jobid = f['jobid']
            if not jobid in filesForJobs.keys():
                filesForJobs[jobid] = {}
            if f['id'] not in filesForJobs[jobid].keys():
                wmbsFile = File(id = f['id'])
                wmbsFile.update(f)
                wmbsFile['locations'].add(f['se_name'])
                for r in wmbsFile['newRuns']:
                    wmbsFile.addRun(r)
                for entry in parentList:
                    if entry['id'] == f['id']:
                        wmbsFile['parents'].add(entry['lfn'])
                filesForJobs[jobid][f['id']] = wmbsFile
            else:
                # If the file is there, just add the location
                filesForJobs[jobid][f['id']]['locations'].add(f['se_name'])

        for j in jobList:
            if j['id'] in filesForJobs.keys():
                j['input_files'] = filesForJobs[j['id']].values()

        return jobList