Python DBSWriterObjects.getDBSFileBlock Examples

Programming Language: Python

Namespace/Package Name: WMCore.Services.DBS

Class/Type: DBSWriterObjects

Method/Function: getDBSFileBlock

Examples at hotexamples.com: 8

Python DBSWriterObjects.getDBSFileBlock - 8 examples found. These are the top rated real world Python examples of WMCore.Services.DBS.DBSWriterObjects.getDBSFileBlock extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

getDBSFileBlock(4)

createAlgorithm(2)

createAlgorithmForInsert(2)

createDBSFileBlock(2)

createDBSFiles(2)

createPrimaryDataset(2)

createProcessedDataset(2)

makeTierList(2)

createMergeAlgorithm(1)

Example #1

Show file

File: DBSWriter.py Project: PerilousApricot/CRAB2

    def insertFilesForDBSBuffer(
        self,
        files,
        procDataset,
        algos,
        jobType="NotMerge",
        insertDetectorData=False,
        maxFiles=100,
        maxSize=99999999,
        timeOut=None,
        fileCommitLength=5,
    ):
        """
        _insertFiles_

        list of files inserted in DBS
        """
        # TODO: Whats the purpose of insertDetectorData

        if len(files) < 1:
            return
        affectedBlocks = []
        insertFiles = []
        addedRuns = []
        seName = None

        # Get the algos in insertable form
        # logging.error("About to input algos")
        # logging.error(algos)
        ialgos = [DBSWriterObjects.createAlgorithmForInsert(dict(algo)) for algo in algos]

        # print ialgos

        for outFile in files:
            #  //
            # // Convert each file into a DBS File object
            # //
            lumiList = []

            # Somehing similar should be the real deal when multiple runs/lumi could be returned from wmbs file

            for runlumiinfo in outFile.getRuns():
                lrun = long(runlumiinfo.run)
                run = DbsRun(
                    RunNumber=lrun,
                    NumberOfEvents=0,
                    NumberOfLumiSections=0,
                    TotalLuminosity=0,
                    StoreNumber=0,
                    StartOfRun=0,
                    EndOfRun=0,
                )
                # Only added if not added by another file in this loop, why waste a call to DBS
                if lrun not in addedRuns:
                    self.dbs.insertRun(run)
                    addedRuns.append(lrun)  # save it so we do not try to add it again to DBS
                    logging.debug("run %s added to DBS " % str(lrun))
                for alsn in runlumiinfo:
                    lumi = DbsLumiSection(
                        LumiSectionNumber=long(alsn),
                        StartEventNumber=0,
                        EndEventNumber=0,
                        LumiStartTime=0,
                        LumiEndTime=0,
                        RunNumber=lrun,
                    )
                    lumiList.append(lumi)

            logging.debug("lumi list created for the file")

            dbsfile = DbsFile(
                # Checksum = str(outFile['cksum']),
                NumberOfEvents=outFile["events"],
                LogicalFileName=outFile["lfn"],
                FileSize=int(outFile["size"]),
                Status="VALID",
                ValidationStatus="VALID",
                FileType="EDM",
                Dataset=procDataset,
                TierList=DBSWriterObjects.makeTierList(procDataset["Path"].split("/")[3]),
                AlgoList=ialgos,
                LumiList=lumiList,
                ParentList=outFile.getParentLFNs(),
                # BranchHash = outFile['BranchHash'],
            )
            # Set checksums by hand
            # dbsfile['Checksum'] = 0  #Set a default?
            for entry in outFile["checksums"].keys():
                # This should be a dictionary with a cktype key and cksum value
                if entry.lower() == "cksum":
                    dbsfile["Checksum"] = str(outFile["checksums"][entry])
                elif entry.lower() == "adler32":
                    dbsfile["Adler32"] = str(outFile["checksums"][entry])
                elif entry.lower() == "md5":
                    dbsfile["Md5"] = str(outFile["checksums"][entry])

            # This check comes from ProdAgent, not sure if its required
            if len(outFile["locations"]) > 0:
                seName = list(outFile["locations"])[0]
                logging.debug("SEname associated to file is: %s" % seName)
            else:
                msg = "Error in DBSWriter.insertFiles\n"
                msg += "No SEname associated to file"
                # print "FAKING seName for now"
                # seName="cmssrm.fnal.gov"
                raise DBSWriterError(msg)
            insertFiles.append(dbsfile)
        #  //Processing Jobs:
        # // Insert the lists of sorted files into the appropriate
        # //  fileblocks

        sumSize = 0
        sumFiles = 0
        tmpFiles = []
        blockList = []
        # First, get the block.  See if the block already exists
        try:
            fileBlock = DBSWriterObjects.getDBSFileBlock(self.dbs, procDataset, seName)
            fileBlock["files"] = []
            # if not fileBlock in affectedBlocks:
            #    affectedBlocks.append(fileBlock)
        except DbsException, ex:
            msg = "Error in DBSWriter.insertFilesForDBSBuffer\n"
            msg += "Cannot retrieve FileBlock for dataset:\n"
            msg += " %s\n" % procDataset["Path"]
            msg += "%s\n" % formatEx(ex)
            raise DBSWriterError(msg)

Example #2

Show file

    def insertFiles(self, fwkJobRep, insertDetectorData=False):
        """
        _insertFiles_

        Process the files in the FwkJobReport instance and insert
        them into the associated datasets

        A list of affected fileblock names is returned both for merged
        and unmerged fileblocks. Only merged blocks will have to be managed.
        #for merged file
        #blocks to facilitate management of those blocks.
        #This list is not populated for processing jobs since we dont really
        #care about the processing job blocks.

        """

        insertLists = {}
        orderedHashes = []
        affectedBlocks = set()

        if len(fwkJobRep.files) <= 0:
            msg = "Error in DBSWriter.insertFiles\n"
            msg += "No files found in FrameWorkJobReport for:\n"
            msg += "==> JobSpecId: %s" % fwkJobRep.jobSpecId
            msg += " Workflow: %s" % fwkJobRep.workflowSpecId
            raise DBSWriterError(msg)

        for outFile in fwkJobRep.sortFiles():
            #  //
            # // Convert each file into a DBS File object
            #//
            seName = None
            if "SEName" in outFile:
                if outFile['SEName']:
                    seName = outFile['SEName']
                    logging.debug("SEname associated to file is: %s" % seName)


## remove the fallback to site se-name if no SE is associated to File
## because it's likely that there is some stage out problem if there
## is no SEName associated to the file.
#            if not seName:
#                if fwkJobRep.siteDetails.has_key("se-name"):
#                   seName = fwkJobRep.siteDetails['se-name']
#                   seName = str(seName)
#                   logging.debug("site SEname: %s"%seName)
            if not seName:
                msg = "Error in DBSWriter.insertFiles\n"
                msg += "No SEname associated to files in FrameWorkJobReport for "
                #                msg += "No SEname found in FrameWorkJobReport for "
                msg += "==> JobSpecId: %s" % fwkJobRep.jobSpecId
                msg += " Workflow: %s" % fwkJobRep.workflowSpecId
                raise DBSWriterError(msg)
            try:
                if (insertDetectorData):
                    dbsFiles = DBSWriterObjects.createDBSFiles(
                        outFile, fwkJobRep.jobType, self.dbs)
                else:
                    dbsFiles = DBSWriterObjects.createDBSFiles(
                        outFile, fwkJobRep.jobType)
            except DbsException as ex:
                msg = "Error in DBSWriter.insertFiles:\n"
                msg += "Error creating DbsFile instances for file:\n"
                msg += "%s\n" % outFile['LFN']
                msg += "%s\n" % formatEx(ex)
                raise DBSWriterError(msg)

            if len(dbsFiles) <= 0:
                msg = "No DbsFile instances created. Not enough info in the FrameWorkJobReport for"
                msg += "==> JobSpecId: %s" % fwkJobRep.jobSpecId
                msg += " Workflow: %s" % fwkJobRep.workflowSpecId
                raise DBSWriterError(msg)

            for f in dbsFiles:
                datasetName = makeDBSDSName(f)
                hashName = "%s-%s" % (seName, datasetName)

                if hashName not in insertLists:
                    insertLists[hashName] = _InsertFileList(
                        seName, datasetName)
                insertLists[hashName].append(f)

                if not orderedHashes.count(hashName):
                    orderedHashes.append(hashName)

        #  //Processing Jobs:
        # // Insert the lists of sorted files into the appropriate
        #//  fileblocks

        for hash in orderedHashes:

            fileList = insertLists[hash]
            procDataset = fileList[0]['Dataset']

            try:
                fileBlock = DBSWriterObjects.getDBSFileBlock(
                    self.dbs, procDataset, fileList.seName)

            except DbsException as ex:
                msg = "Error in DBSWriter.insertFiles\n"
                msg += "Cannot retrieve FileBlock for dataset:\n"
                msg += " %s\n" % procDataset
                msg += "In Storage Element:\n %s\n" % fileList.seName
                msg += "%s\n" % formatEx(ex)
                raise DBSWriterError(msg)

            if fwkJobRep.jobType == "Merge":
                #  //
                # // Merge files
                #//
                for mergedFile in fileList:
                    mergedFile['Block'] = fileBlock
                    affectedBlocks.add(fileBlock['Name'])
                    msg = "calling: self.dbs.insertMergedFile(%s, %s)" % (str(
                        mergedFile['ParentList']), str(mergedFile))
                    logging.debug(msg)
                    try:
                        self.dbs.insertMergedFile(mergedFile['ParentList'],
                                                  mergedFile)

                    except DbsException as ex:
                        msg = "Error in DBSWriter.insertFiles\n"
                        msg += "Cannot insert merged file:\n"
                        msg += "  %s\n" % mergedFile['LogicalFileName']
                        msg += "%s\n" % formatEx(ex)
                        raise DBSWriterError(msg)
                    logging.debug(
                        "Inserted merged file: %s to FileBlock: %s" %
                        (mergedFile['LogicalFileName'], fileBlock['Name']))
            else:
                #  //
                # // Processing files
                #//
                affectedBlocks.add(fileBlock['Name'])
                msg = "calling: self.dbs.insertFiles(%s, %s, %s)" % (
                    str(procDataset), str(list(fileList)), str(fileBlock))
                logging.debug(msg)

                try:
                    self.dbs.insertFiles(procDataset, list(fileList),
                                         fileBlock)
                except DbsException as ex:
                    msg = "Error in DBSWriter.insertFiles\n"
                    msg += "Cannot insert processed files:\n"
                    msg += " %s\n" % ([x['LogicalFileName']
                                       for x in fileList], )

                    msg += "%s\n" % formatEx(ex)
                    raise DBSWriterError(msg)
                logging.debug("Inserted files: %s to FileBlock: %s" %
                              (([x['LogicalFileName']
                                 for x in fileList]), fileBlock['Name']))

        return list(affectedBlocks)

Example #3

Show file

File: DBSWriter.py Project: PerilousApricot/CRAB2

            # First see if the block is full
            if self.manageFileBlock(
                fileBlock=fileBlock,
                maxFiles=maxFiles,
                maxSize=maxSize,
                timeOut=timeOut,
                algos=ialgos,
                filesToCommit=filesToCommit,
                procDataset=procDataset,
            ):
                fileBlock["OpenForWriting"] = 0
                if not fileBlock in affectedBlocks:
                    affectedBlocks.append(fileBlock)
                # Then we need a new block
                try:
                    fileBlock = DBSWriterObjects.getDBSFileBlock(self.dbs, procDataset, seName)
                    fileBlock["files"] = []
                except DbsException, ex:
                    msg = "Error in DBSWriter.insertFilesForDBSBuffer\n"
                    msg += "Cannot retrieve FileBlock for dataset:\n"
                    msg += " %s\n" % procDataset["Path"]
                    msg += "%s\n" % formatEx(ex)
                    raise DBSWriterError(msg)

            fileBlock["files"].append(file["LogicalFileName"])
            filesToCommit.append(file)
            if len(filesToCommit) >= fileCommitLength:
                # Only commit the files if there are more of them then the maximum length
                try:
                    self.dbs.insertFiles(procDataset, filesToCommit, fileBlock)
                    filesToCommit = []

Example #4

Show file

    def insertFilesForDBSBuffer(self,
                                files,
                                procDataset,
                                algos,
                                jobType="NotMerge",
                                insertDetectorData=False,
                                maxFiles=100,
                                maxSize=99999999,
                                timeOut=None,
                                fileCommitLength=5):
        """
        _insertFiles_

        list of files inserted in DBS
        """
        #TODO: Whats the purpose of insertDetectorData

        if len(files) < 1:
            return
        affectedBlocks = []
        insertFiles = []
        addedRuns = []
        seName = None

        #Get the algos in insertable form
        # logging.error("About to input algos")
        # logging.error(algos)
        ialgos = [
            DBSWriterObjects.createAlgorithmForInsert(dict(algo))
            for algo in algos
        ]

        #print ialgos

        for outFile in files:
            #  //
            # // Convert each file into a DBS File object
            #//
            lumiList = []

            #Somehing similar should be the real deal when multiple runs/lumi could be returned from wmbs file

            for runlumiinfo in outFile.getRuns():
                lrun = long(runlumiinfo.run)
                run = DbsRun(
                    RunNumber=lrun,
                    NumberOfEvents=0,
                    NumberOfLumiSections=0,
                    TotalLuminosity=0,
                    StoreNumber=0,
                    StartOfRun=0,
                    EndOfRun=0,
                )
                #Only added if not added by another file in this loop, why waste a call to DBS
                if lrun not in addedRuns:
                    self.dbs.insertRun(run)
                    addedRuns.append(
                        lrun)  #save it so we do not try to add it again to DBS
                    logging.debug("run %s added to DBS " % str(lrun))
                for alsn in runlumiinfo:
                    lumi = DbsLumiSection(
                        LumiSectionNumber=long(alsn),
                        StartEventNumber=0,
                        EndEventNumber=0,
                        LumiStartTime=0,
                        LumiEndTime=0,
                        RunNumber=lrun,
                    )
                    lumiList.append(lumi)

            logging.debug("lumi list created for the file")

            dbsfile = DbsFile(
                #Checksum = str(outFile['cksum']),
                NumberOfEvents=outFile['events'],
                LogicalFileName=outFile['lfn'],
                FileSize=int(outFile['size']),
                Status="VALID",
                ValidationStatus='VALID',
                FileType='EDM',
                Dataset=procDataset,
                TierList=DBSWriterObjects.makeTierList(
                    procDataset['Path'].split('/')[3]),
                AlgoList=ialgos,
                LumiList=lumiList,
                ParentList=outFile.getParentLFNs(),
                #BranchHash = outFile['BranchHash'],
            )
            #Set checksums by hand
            #dbsfile['Checksum'] = 0  #Set a default?
            for entry in outFile['checksums'].keys():
                #This should be a dictionary with a cktype key and cksum value
                if entry.lower() == 'cksum':
                    dbsfile['Checksum'] = str(outFile['checksums'][entry])
                elif entry.lower() == 'adler32':
                    dbsfile['Adler32'] = str(outFile['checksums'][entry])
                elif entry.lower() == 'md5':
                    dbsfile['Md5'] = str(outFile['checksums'][entry])

            #This check comes from ProdAgent, not sure if its required
            if len(outFile["locations"]) > 0:
                seName = list(outFile["locations"])[0]
                logging.debug("SEname associated to file is: %s" % seName)
            else:
                msg = "Error in DBSWriter.insertFiles\n"
                msg += "No SEname associated to file"
                #print "FAKING seName for now"
                #seName="cmssrm.fnal.gov"
                raise DBSWriterError(msg)
            insertFiles.append(dbsfile)
        #  //Processing Jobs:
        # // Insert the lists of sorted files into the appropriate
        #//  fileblocks

        sumSize = 0
        sumFiles = 0
        tmpFiles = []
        blockList = []
        #First, get the block.  See if the block already exists
        try:
            fileBlock = DBSWriterObjects.getDBSFileBlock(
                self.dbs, procDataset, seName)
            fileBlock['files'] = []
            #if not fileBlock in affectedBlocks:
            #    affectedBlocks.append(fileBlock)
        except DbsException as ex:
            msg = "Error in DBSWriter.insertFilesForDBSBuffer\n"
            msg += "Cannot retrieve FileBlock for dataset:\n"
            msg += " %s\n" % procDataset['Path']
            msg += "%s\n" % formatEx(ex)
            raise DBSWriterError(msg)

        filesToCommit = []
        for file in insertFiles:
            # First see if the block is full
            if self.manageFileBlock(fileBlock=fileBlock,
                                    maxFiles=maxFiles,
                                    maxSize=maxSize,
                                    timeOut=timeOut,
                                    algos=ialgos,
                                    filesToCommit=filesToCommit,
                                    procDataset=procDataset):
                fileBlock['OpenForWriting'] = 0
                if not fileBlock in affectedBlocks:
                    affectedBlocks.append(fileBlock)
                # Then we need a new block
                try:
                    fileBlock = DBSWriterObjects.getDBSFileBlock(
                        self.dbs, procDataset, seName)
                    fileBlock['files'] = []
                except DbsException as ex:
                    msg = "Error in DBSWriter.insertFilesForDBSBuffer\n"
                    msg += "Cannot retrieve FileBlock for dataset:\n"
                    msg += " %s\n" % procDataset['Path']
                    msg += "%s\n" % formatEx(ex)
                    raise DBSWriterError(msg)

            fileBlock['files'].append(file['LogicalFileName'])
            filesToCommit.append(file)
            if len(filesToCommit) >= fileCommitLength:
                # Only commit the files if there are more of them then the maximum length
                try:
                    self.dbs.insertFiles(procDataset, filesToCommit, fileBlock)
                    filesToCommit = []
                    logging.debug("Inserted files: %s to FileBlock: %s" \
                                  % ( ([ x['LogicalFileName'] for x in insertFiles ]),fileBlock['Name']))

                except DbsException as ex:
                    msg = "Error in DBSWriter.insertFiles\n"
                    msg += "Cannot insert processed files:\n"
                    msg += " %s\n" % (
                        [x['LogicalFileName'] for x in insertFiles], )
                    msg += "%s\n" % formatEx(ex)
                    raise DBSWriterError(msg)

        if len(filesToCommit) > 0:
            try:
                self.dbs.insertFiles(procDataset, filesToCommit, fileBlock)
                filesToCommit = []
                logging.debug("Inserted files: %s to FileBlock: %s" \
                              % ( ([ x['LogicalFileName'] for x in insertFiles ]),fileBlock['Name']))

            except DbsException as ex:
                msg = "Error in DBSWriter.insertFiles\n"
                msg += "Cannot insert processed files:\n"
                msg += " %s\n" % ([x['LogicalFileName']
                                   for x in insertFiles], )
                msg += "%s\n" % formatEx(ex)
                raise DBSWriterError(msg)

        if not fileBlock in affectedBlocks:
            affectedBlocks.append(fileBlock)

        ## Do bulk inserts now for DBS
        #filesToCommit = []
        #count         = 0
        #count2        = 0
        #for file in insertFiles:
        #    count += 1
        #    #Try and close the box
        #    logging.error("Should have a file")
        #    logging.error(len(filesToCommit))
        #    count2 += len(filesToCommit)
        #    if self.manageFileBlock(fileBlock = fileBlock, maxFiles = maxFiles,
        #                            maxSize = maxSize, timeOut = timeOut, algos = ialgos,
        #                            filesToCommit = filesToCommit, procDataset = procDataset):
        #        fileBlock['OpenForWriting'] = '0'
        #        if not fileBlock in affectedBlocks:
        #            affectedBlocks.append(fileBlock)
        #
        #
        #
        #        # Then we need a new block
        #        try:
        #            fileBlock = DBSWriterObjects.getDBSFileBlock(
        #                self.dbs,
        #                procDataset,
        #                seName)
        #            fileBlock['files'] = []
        #        except DbsException, ex:
        #            msg = "Error in DBSWriter.insertFilesForDBSBuffer\n"
        #            msg += "Cannot retrieve FileBlock for dataset:\n"
        #            msg += " %s\n" % procDataset['Path']
        #            msg += "%s\n" % formatEx(ex)
        #            raise DBSWriterError(msg)
        #    #At this point, we should commit the block as is
        #    fileBlock['files'].append(file['LogicalFileName'])
        #    if jobType == "MergeSpecial":
        #        for file in fileList:
        #            file['Block'] = fileBlock
        #            msg="calling: self.dbs.insertMergedFile(%s, %s)" % (str(file['ParentList']),str(file))
        #            logging.debug(msg)
        #            try:
        #                #
        #                #
        #                # NOTE To Anzar From Anzar (File cloning as in DBS API can be done here and then I can use Bulk insert on Merged files as well)
        #                self.dbs.insertMergedFile(file['ParentList'],
        #                                          file)
        #
        #            except DbsException, ex:
        #                msg = "Error in DBSWriter.insertFiles\n"
        #                msg += "Cannot insert merged file:\n"
        #                msg += "  %s\n" % file['LogicalFileName']
        #                msg += "%s\n" % formatEx(ex)
        #                raise DBSWriterError(msg)
        #            logging.debug("Inserted merged file: %s to FileBlock: %s"%(file['LogicalFileName'],fileBlock['Name']))
        #    else:
        #        filesToCommit.append(file)
        #        if len(filesToCommit) >= fileCommitLength:
        #            # Only commit the files if there are more of them then the maximum length
        #            try:
        #                logging.error("About to commit %i files" %(len(filesToCommit)))
        #                count2 += len(filesToCommit)
        #                self.dbs.insertFiles(procDataset, filesToCommit, fileBlock)
        #                filesToCommit = []
        #                logging.debug("Inserted files: %s to FileBlock: %s" \
        #                              % ( ([ x['LogicalFileName'] for x in insertFiles ]),fileBlock['Name']))
        #
        #            except DbsException, ex:
        #                msg = "Error in DBSWriter.insertFiles\n"
        #                msg += "Cannot insert processed files:\n"
        #                msg += " %s\n" % ([ x['LogicalFileName'] for x in insertFiles ],)
        #                msg += "%s\n" % formatEx(ex)
        #                raise DBSWriterError(msg)
        #
        #
        #
        #
        ## If we still have files to commit, commit them
        #logging.error("Got to the end of the loop")
        #logging.error(len(filesToCommit))
        #logging.error(count2)
        #if len(filesToCommit) > 0:
        #    try:
        #        logging.error("About to insert some files")
        #        self.dbs.insertFiles(procDataset, filesToCommit, fileBlock)
        #        filesToCommit = []
        #        logging.debug("Inserted files: %s to FileBlock: %s" \
        #                      % ( ([ x['LogicalFileName'] for x in insertFiles ]),fileBlock['Name']))
        #
        #    except DbsException, ex:
        #        msg = "Error in DBSWriter.insertFiles\n"
        #        msg += "Cannot insert processed files:\n"
        #        msg += " %s\n" % ([ x['LogicalFileName'] for x in insertFiles ],)
        #        msg += "%s\n" % formatEx(ex)
        #        raise DBSWriterError(msg)

        if not fileBlock in affectedBlocks:
            affectedBlocks.append(fileBlock)

        return list(affectedBlocks)

Example #5

Show file

File: DBSWriter.py Project: dmwm/WMCore-legacy

    def insertFilesForDBSBuffer(self,
                                files,
                                procDataset,
                                algos,
                                jobType="NotMerge",
                                insertDetectorData=False,
                                maxFiles=100,
                                maxSize=99999999,
                                timeOut=None,
                                fileCommitLength=5):
        """
        _insertFiles_

        list of files inserted in DBS
        """
        #TODO: Whats the purpose of insertDetectorData

        if len(files) < 1:
            return
        affectedBlocks = []
        insertFiles = []
        addedRuns = []
        seName = None

        #Get the algos in insertable form
        # logging.error("About to input algos")
        # logging.error(algos)
        ialgos = [
            DBSWriterObjects.createAlgorithmForInsert(dict(algo))
            for algo in algos
        ]

        #print ialgos

        for outFile in files:
            #  //
            # // Convert each file into a DBS File object
            #//
            lumiList = []

            #Somehing similar should be the real deal when multiple runs/lumi could be returned from wmbs file

            for runlumiinfo in outFile.getRuns():
                lrun = long(runlumiinfo.run)
                run = DbsRun(
                    RunNumber=lrun,
                    NumberOfEvents=0,
                    NumberOfLumiSections=0,
                    TotalLuminosity=0,
                    StoreNumber=0,
                    StartOfRun=0,
                    EndOfRun=0,
                )
                #Only added if not added by another file in this loop, why waste a call to DBS
                if lrun not in addedRuns:
                    self.dbs.insertRun(run)
                    addedRuns.append(
                        lrun)  #save it so we do not try to add it again to DBS
                    logging.debug("run %s added to DBS " % str(lrun))
                for alsn in runlumiinfo:
                    lumi = DbsLumiSection(
                        LumiSectionNumber=long(alsn),
                        StartEventNumber=0,
                        EndEventNumber=0,
                        LumiStartTime=0,
                        LumiEndTime=0,
                        RunNumber=lrun,
                    )
                    lumiList.append(lumi)

            logging.debug("lumi list created for the file")

            dbsfile = DbsFile(
                #Checksum = str(outFile['cksum']),
                NumberOfEvents=outFile['events'],
                LogicalFileName=outFile['lfn'],
                FileSize=int(outFile['size']),
                Status="VALID",
                ValidationStatus='VALID',
                FileType='EDM',
                Dataset=procDataset,
                TierList=DBSWriterObjects.makeTierList(
                    procDataset['Path'].split('/')[3]),
                AlgoList=ialgos,
                LumiList=lumiList,
                ParentList=outFile.getParentLFNs(),
                #BranchHash = outFile['BranchHash'],
            )
            #Set checksums by hand
            #dbsfile['Checksum'] = 0  #Set a default?
            for entry in outFile['checksums'].keys():
                #This should be a dictionary with a cktype key and cksum value
                if entry.lower() == 'cksum':
                    dbsfile['Checksum'] = str(outFile['checksums'][entry])
                elif entry.lower() == 'adler32':
                    dbsfile['Adler32'] = str(outFile['checksums'][entry])
                elif entry.lower() == 'md5':
                    dbsfile['Md5'] = str(outFile['checksums'][entry])

            #This check comes from ProdAgent, not sure if its required
            if len(outFile["locations"]) > 0:
                seName = list(outFile["locations"])[0]
                logging.debug("SEname associated to file is: %s" % seName)
            else:
                msg = "Error in DBSWriter.insertFiles\n"
                msg += "No SEname associated to file"
                #print "FAKING seName for now"
                #seName="cmssrm.fnal.gov"
                raise DBSWriterError(msg)
            insertFiles.append(dbsfile)
        #  //Processing Jobs:
        # // Insert the lists of sorted files into the appropriate
        #//  fileblocks

        sumSize = 0
        sumFiles = 0
        tmpFiles = []
        blockList = []
        #First, get the block.  See if the block already exists
        try:
            fileBlock = DBSWriterObjects.getDBSFileBlock(
                self.dbs, procDataset, seName)
            fileBlock['files'] = []
            #if not fileBlock in affectedBlocks:
            #    affectedBlocks.append(fileBlock)
        except DbsException, ex:
            msg = "Error in DBSWriter.insertFilesForDBSBuffer\n"
            msg += "Cannot retrieve FileBlock for dataset:\n"
            msg += " %s\n" % procDataset['Path']
            msg += "%s\n" % formatEx(ex)
            raise DBSWriterError(msg)

Example #6

Show file

File: DBSWriter.py Project: dmwm/WMCore-legacy

        filesToCommit = []
        for file in insertFiles:
            # First see if the block is full
            if self.manageFileBlock(fileBlock=fileBlock,
                                    maxFiles=maxFiles,
                                    maxSize=maxSize,
                                    timeOut=timeOut,
                                    algos=ialgos,
                                    filesToCommit=filesToCommit,
                                    procDataset=procDataset):
                fileBlock['OpenForWriting'] = 0
                if not fileBlock in affectedBlocks:
                    affectedBlocks.append(fileBlock)
                # Then we need a new block
                try:
                    fileBlock = DBSWriterObjects.getDBSFileBlock(
                        self.dbs, procDataset, seName)
                    fileBlock['files'] = []
                except DbsException, ex:
                    msg = "Error in DBSWriter.insertFilesForDBSBuffer\n"
                    msg += "Cannot retrieve FileBlock for dataset:\n"
                    msg += " %s\n" % procDataset['Path']
                    msg += "%s\n" % formatEx(ex)
                    raise DBSWriterError(msg)

            fileBlock['files'].append(file['LogicalFileName'])
            filesToCommit.append(file)
            if len(filesToCommit) >= fileCommitLength:
                # Only commit the files if there are more of them then the maximum length
                try:
                    self.dbs.insertFiles(procDataset, filesToCommit, fileBlock)
                    filesToCommit = []

Example #7

Show file

File: DBSWriter.py Project: HassenRiahi/WMCore

    def insertFiles(self, fwkJobRep, insertDetectorData = False):
        """
        _insertFiles_

        Process the files in the FwkJobReport instance and insert
        them into the associated datasets

        A list of affected fileblock names is returned both for merged
        and unmerged fileblocks. Only merged blocks will have to be managed.
        #for merged file
        #blocks to facilitate management of those blocks.
        #This list is not populated for processing jobs since we dont really
        #care about the processing job blocks.

        """

        insertLists = {}
        orderedHashes = []
        affectedBlocks = set()

        if len(fwkJobRep.files)<=0:
            msg = "Error in DBSWriter.insertFiles\n"
            msg += "No files found in FrameWorkJobReport for:\n"
            msg += "==> JobSpecId: %s"%fwkJobRep.jobSpecId
            msg += " Workflow: %s"%fwkJobRep.workflowSpecId
            raise DBSWriterError(msg)


        for outFile in fwkJobRep.sortFiles():
            #  //
            # // Convert each file into a DBS File object
            #//
            pnn = None
            if "PNN" in outFile:
                if outFile['PNN'] :
                    pnn = outFile['PNN']
                    logging.debug("PNN associated to file is: %s"%pnn)
## remove the fallback to site se-name if no SE is associated to File
## because it's likely that there is some stage out problem if there
## is no SEName associated to the file.
#            if not seName:
#                if fwkJobRep.siteDetails.has_key("se-name"):
#                   seName = fwkJobRep.siteDetails['se-name']
#                   seName = str(seName)
#                   logging.debug("site SEname: %s"%seName)
            if not pnn:
                msg = "Error in DBSWriter.insertFiles\n"
                msg += "No PNN associated to files in FrameWorkJobReport for "
#                msg += "No SEname found in FrameWorkJobReport for "
                msg += "==> JobSpecId: %s"%fwkJobRep.jobSpecId
                msg += " Workflow: %s"%fwkJobRep.workflowSpecId
                raise DBSWriterError(msg)
            try:
                if ( insertDetectorData ):
                    dbsFiles = DBSWriterObjects.createDBSFiles(outFile,
                                                               fwkJobRep.jobType,
                                                               self.dbs)
                else:
                    dbsFiles = DBSWriterObjects.createDBSFiles(outFile,
                                                               fwkJobRep.jobType)
            except DbsException as ex:
                msg = "Error in DBSWriter.insertFiles:\n"
                msg += "Error creating DbsFile instances for file:\n"
                msg += "%s\n" % outFile['LFN']
                msg += "%s\n" % formatEx(ex)
                raise DBSWriterError(msg)

            if len(dbsFiles)<=0:
                msg="No DbsFile instances created. Not enough info in the FrameWorkJobReport for"
                msg += "==> JobSpecId: %s"%fwkJobRep.jobSpecId
                msg += " Workflow: %s"%fwkJobRep.workflowSpecId
                raise DBSWriterError(msg)

            for f in dbsFiles:
                datasetName = makeDBSDSName(f)
                hashName = "%s-%s" % (pnn, datasetName)

                if hashName not in insertLists:
                    insertLists[hashName] = _InsertFileList(pnn,
                                                            datasetName)
                insertLists[hashName].append(f)

                if not orderedHashes.count(hashName):
                    orderedHashes.append(hashName)


        #  //Processing Jobs:
        # // Insert the lists of sorted files into the appropriate
        #//  fileblocks

        for hash in orderedHashes:

            fileList = insertLists[hash]
            procDataset = fileList[0]['Dataset']


            try:
                fileBlock = DBSWriterObjects.getDBSFileBlock(
                    self.dbs,
                    procDataset,
                    fileList.pnn)

            except DbsException as ex:
                msg = "Error in DBSWriter.insertFiles\n"
                msg += "Cannot retrieve FileBlock for dataset:\n"
                msg += " %s\n" % procDataset
#                msg += "In Storage Element:\n %s\n" % fileList.seName
                msg += "In PNN:\n %s\n" % fileList.pnn
                msg += "%s\n" % formatEx(ex)
                raise DBSWriterError(msg)

            if fwkJobRep.jobType == "Merge":
                #  //
                # // Merge files
                #//
                for mergedFile in fileList:
                    mergedFile['Block'] = fileBlock
                    affectedBlocks.add(fileBlock['Name'])
                    msg="calling: self.dbs.insertMergedFile(%s, %s)" % (str(mergedFile['ParentList']),str(mergedFile))
                    logging.debug(msg)
                    try:
                        self.dbs.insertMergedFile(mergedFile['ParentList'],
                                                  mergedFile)

                    except DbsException as ex:
                        msg = "Error in DBSWriter.insertFiles\n"
                        msg += "Cannot insert merged file:\n"
                        msg += "  %s\n" % mergedFile['LogicalFileName']
                        msg += "%s\n" % formatEx(ex)
                        raise DBSWriterError(msg)
                    logging.debug("Inserted merged file: %s to FileBlock: %s"%(mergedFile['LogicalFileName'],fileBlock['Name']))
            else:
                #  //
                # // Processing files
                #//
                affectedBlocks.add(fileBlock['Name'])
                msg="calling: self.dbs.insertFiles(%s, %s, %s)" % (str(procDataset),str(list(fileList)),str(fileBlock))
                logging.debug(msg)

                try:
                    self.dbs.insertFiles(procDataset, list(fileList),
                                         fileBlock)
                except DbsException as ex:
                    msg = "Error in DBSWriter.insertFiles\n"
                    msg += "Cannot insert processed files:\n"
                    msg += " %s\n" % (
                        [ x['LogicalFileName'] for x in fileList ],
                        )

                    msg += "%s\n" % formatEx(ex)
                    raise DBSWriterError(msg)
                logging.debug("Inserted files: %s to FileBlock: %s"%( ([ x['LogicalFileName'] for x in fileList ]),fileBlock['Name']))

        return list(affectedBlocks)

Example #8

Show file

File: DBSWriter.py Project: HassenRiahi/WMCore

    def insertFilesForDBSBuffer(self, files, procDataset, algos,
                                jobType = "NotMerge", insertDetectorData = False,
                                maxFiles = 100, maxSize = 99999999, timeOut = None,
                                fileCommitLength = 5):
        """
        _insertFiles_

        list of files inserted in DBS
        """
        #TODO: Whats the purpose of insertDetectorData

        if len(files) < 1:
            return
        affectedBlocks = []
        insertFiles =  []
        addedRuns=[]
        pnn = None

        #Get the algos in insertable form
        # logging.error("About to input algos")
        # logging.error(algos)
        ialgos = [DBSWriterObjects.createAlgorithmForInsert(dict(algo)) for algo in algos ]

        #print ialgos

        for outFile in files:
            #  //
            # // Convert each file into a DBS File object
            #//
            lumiList = []

            #Somehing similar should be the real deal when multiple runs/lumi could be returned from wmbs file

            for runlumiinfo in outFile.getRuns():
                lrun=long(runlumiinfo.run)
                run = DbsRun(
                    RunNumber = lrun,
                    NumberOfEvents = 0,
                    NumberOfLumiSections = 0,
                    TotalLuminosity = 0,
                    StoreNumber = 0,
                    StartOfRun = 0,
                    EndOfRun = 0,
                    )
                #Only added if not added by another file in this loop, why waste a call to DBS
                if lrun not in addedRuns:
                    self.dbs.insertRun(run)
                    addedRuns.append(lrun) #save it so we do not try to add it again to DBS
                    logging.debug("run %s added to DBS " % str(lrun))
                for alsn in runlumiinfo:
                    lumi = DbsLumiSection(
                            LumiSectionNumber = long(alsn),
                            StartEventNumber = 0,
                            EndEventNumber = 0,
                            LumiStartTime = 0,
                            LumiEndTime = 0,
                            RunNumber = lrun,
                    )
                    lumiList.append(lumi)

            logging.debug("lumi list created for the file")

            dbsfile = DbsFile(
                              #Checksum = str(outFile['cksum']),
                              NumberOfEvents = outFile['events'],
                              LogicalFileName = outFile['lfn'],
                              FileSize = int(outFile['size']),
                              Status = "VALID",
                              ValidationStatus = 'VALID',
                              FileType = 'EDM',
                              Dataset = procDataset,
                              TierList = DBSWriterObjects.makeTierList(procDataset['Path'].split('/')[3]),
                              AlgoList = ialgos,
                              LumiList = lumiList,
                              ParentList = outFile.getParentLFNs(),
                              #BranchHash = outFile['BranchHash'],
                            )
            #Set checksums by hand
            #dbsfile['Checksum'] = 0  #Set a default?
            for entry in outFile['checksums'].keys():
                #This should be a dictionary with a cktype key and cksum value
                if entry.lower() == 'cksum':
                    dbsfile['Checksum'] = str(outFile['checksums'][entry])
                elif entry.lower() == 'adler32':
                    dbsfile['Adler32'] = str(outFile['checksums'][entry])
                elif entry.lower() == 'md5':
                    dbsfile['Md5'] = str(outFile['checksums'][entry])



            #This check comes from ProdAgent, not sure if its required
            if len(outFile["locations"]) > 0:
                pnn = list(outFile["locations"])[0]
                logging.debug("PNN associated to file is: %s"%pnn)
            else:
                msg = "Error in DBSWriter.insertFiles\n"
                msg += "No PNN associated to file"
                #print "FAKING seName for now"
                #seName="cmssrm.fnal.gov"
                raise DBSWriterError(msg)
            insertFiles.append(dbsfile)
        #  //Processing Jobs:
        # // Insert the lists of sorted files into the appropriate
        #//  fileblocks


        sumSize   = 0
        sumFiles  = 0
        tmpFiles  = []
        blockList = []
        #First, get the block.  See if the block already exists
        try:
            fileBlock = DBSWriterObjects.getDBSFileBlock(
                self.dbs,
                procDataset,
                pnn)
            fileBlock['files'] = []
            #if not fileBlock in affectedBlocks:
            #    affectedBlocks.append(fileBlock)
        except DbsException as ex:
            msg = "Error in DBSWriter.insertFilesForDBSBuffer\n"
            msg += "Cannot retrieve FileBlock for dataset:\n"
            msg += " %s\n" % procDataset['Path']
            msg += "%s\n" % formatEx(ex)
            raise DBSWriterError(msg)



        filesToCommit = []
        for file in insertFiles:
            # First see if the block is full
            if self.manageFileBlock(fileBlock = fileBlock, maxFiles = maxFiles,
                                    maxSize = maxSize, timeOut = timeOut, algos = ialgos,
                                    filesToCommit = filesToCommit, procDataset = procDataset):
                fileBlock['OpenForWriting'] = 0
                if not fileBlock in affectedBlocks:
                    affectedBlocks.append(fileBlock)
                # Then we need a new block
                try:
                    fileBlock = DBSWriterObjects.getDBSFileBlock(
                        self.dbs,
                        procDataset,
                        pnn)
                    fileBlock['files'] = []
                except DbsException as ex:
                    msg = "Error in DBSWriter.insertFilesForDBSBuffer\n"
                    msg += "Cannot retrieve FileBlock for dataset:\n"
                    msg += " %s\n" % procDataset['Path']
                    msg += "%s\n" % formatEx(ex)
                    raise DBSWriterError(msg)

            fileBlock['files'].append(file['LogicalFileName'])
            filesToCommit.append(file)
            if len(filesToCommit) >= fileCommitLength:
                    # Only commit the files if there are more of them then the maximum length
                try:
                    self.dbs.insertFiles(procDataset, filesToCommit, fileBlock)
                    filesToCommit = []
                    logging.debug("Inserted files: %s to FileBlock: %s" \
                                  % ( ([ x['LogicalFileName'] for x in insertFiles ]),fileBlock['Name']))

                except DbsException as ex:
                    msg = "Error in DBSWriter.insertFiles\n"
                    msg += "Cannot insert processed files:\n"
                    msg += " %s\n" % ([ x['LogicalFileName'] for x in insertFiles ],)
                    msg += "%s\n" % formatEx(ex)
                    raise DBSWriterError(msg)


        if len(filesToCommit) > 0:
            try:
                self.dbs.insertFiles(procDataset, filesToCommit, fileBlock)
                filesToCommit = []
                logging.debug("Inserted files: %s to FileBlock: %s" \
                              % ( ([ x['LogicalFileName'] for x in insertFiles ]),fileBlock['Name']))

            except DbsException as ex:
                msg = "Error in DBSWriter.insertFiles\n"
                msg += "Cannot insert processed files:\n"
                msg += " %s\n" % ([ x['LogicalFileName'] for x in insertFiles ],)
                msg += "%s\n" % formatEx(ex)
                raise DBSWriterError(msg)


        if not fileBlock in affectedBlocks:
            affectedBlocks.append(fileBlock)




        ## Do bulk inserts now for DBS
        #filesToCommit = []
        #count         = 0
        #count2        = 0
        #for file in insertFiles:
        #    count += 1
        #    #Try and close the box
        #    logging.error("Should have a file")
        #    logging.error(len(filesToCommit))
        #    count2 += len(filesToCommit)
        #    if self.manageFileBlock(fileBlock = fileBlock, maxFiles = maxFiles,
        #                            maxSize = maxSize, timeOut = timeOut, algos = ialgos,
        #                            filesToCommit = filesToCommit, procDataset = procDataset):
        #        fileBlock['OpenForWriting'] = '0'
        #        if not fileBlock in affectedBlocks:
        #            affectedBlocks.append(fileBlock)
        #
        #
        #
        #        # Then we need a new block
        #        try:
        #            fileBlock = DBSWriterObjects.getDBSFileBlock(
        #                self.dbs,
        #                procDataset,
        #                seName)
        #            fileBlock['files'] = []
        #        except DbsException, ex:
        #            msg = "Error in DBSWriter.insertFilesForDBSBuffer\n"
        #            msg += "Cannot retrieve FileBlock for dataset:\n"
        #            msg += " %s\n" % procDataset['Path']
        #            msg += "%s\n" % formatEx(ex)
        #            raise DBSWriterError(msg)
        #    #At this point, we should commit the block as is
        #    fileBlock['files'].append(file['LogicalFileName'])
        #    if jobType == "MergeSpecial":
        #        for file in fileList:
        #            file['Block'] = fileBlock
        #            msg="calling: self.dbs.insertMergedFile(%s, %s)" % (str(file['ParentList']),str(file))
        #            logging.debug(msg)
        #            try:
        #                #
        #                #
        #                # NOTE To Anzar From Anzar (File cloning as in DBS API can be done here and then I can use Bulk insert on Merged files as well)
        #                self.dbs.insertMergedFile(file['ParentList'],
        #                                          file)
        #
        #            except DbsException, ex:
        #                msg = "Error in DBSWriter.insertFiles\n"
        #                msg += "Cannot insert merged file:\n"
        #                msg += "  %s\n" % file['LogicalFileName']
        #                msg += "%s\n" % formatEx(ex)
        #                raise DBSWriterError(msg)
        #            logging.debug("Inserted merged file: %s to FileBlock: %s"%(file['LogicalFileName'],fileBlock['Name']))
        #    else:
        #        filesToCommit.append(file)
        #        if len(filesToCommit) >= fileCommitLength:
        #            # Only commit the files if there are more of them then the maximum length
        #            try:
        #                logging.error("About to commit %i files" %(len(filesToCommit)))
        #                count2 += len(filesToCommit)
        #                self.dbs.insertFiles(procDataset, filesToCommit, fileBlock)
        #                filesToCommit = []
        #                logging.debug("Inserted files: %s to FileBlock: %s" \
        #                              % ( ([ x['LogicalFileName'] for x in insertFiles ]),fileBlock['Name']))
        #
        #            except DbsException, ex:
        #                msg = "Error in DBSWriter.insertFiles\n"
        #                msg += "Cannot insert processed files:\n"
        #                msg += " %s\n" % ([ x['LogicalFileName'] for x in insertFiles ],)
        #                msg += "%s\n" % formatEx(ex)
        #                raise DBSWriterError(msg)
        #
        #
        #
        #
        ## If we still have files to commit, commit them
        #logging.error("Got to the end of the loop")
        #logging.error(len(filesToCommit))
        #logging.error(count2)
        #if len(filesToCommit) > 0:
        #    try:
        #        logging.error("About to insert some files")
        #        self.dbs.insertFiles(procDataset, filesToCommit, fileBlock)
        #        filesToCommit = []
        #        logging.debug("Inserted files: %s to FileBlock: %s" \
        #                      % ( ([ x['LogicalFileName'] for x in insertFiles ]),fileBlock['Name']))
        #
        #    except DbsException, ex:
        #        msg = "Error in DBSWriter.insertFiles\n"
        #        msg += "Cannot insert processed files:\n"
        #        msg += " %s\n" % ([ x['LogicalFileName'] for x in insertFiles ],)
        #        msg += "%s\n" % formatEx(ex)
        #        raise DBSWriterError(msg)


        if not fileBlock in affectedBlocks:
            affectedBlocks.append(fileBlock)

        return list(affectedBlocks)