Example #1
0
    def insertFilesForDBSBuffer(
        self,
        files,
        procDataset,
        algos,
        jobType="NotMerge",
        insertDetectorData=False,
        maxFiles=100,
        maxSize=99999999,
        timeOut=None,
        fileCommitLength=5,
    ):
        """
        _insertFiles_

        list of files inserted in DBS
        """
        # TODO: Whats the purpose of insertDetectorData

        if len(files) < 1:
            return
        affectedBlocks = []
        insertFiles = []
        addedRuns = []
        seName = None

        # Get the algos in insertable form
        # logging.error("About to input algos")
        # logging.error(algos)
        ialgos = [DBSWriterObjects.createAlgorithmForInsert(dict(algo)) for algo in algos]

        # print ialgos

        for outFile in files:
            #  //
            # // Convert each file into a DBS File object
            # //
            lumiList = []

            # Somehing similar should be the real deal when multiple runs/lumi could be returned from wmbs file

            for runlumiinfo in outFile.getRuns():
                lrun = long(runlumiinfo.run)
                run = DbsRun(
                    RunNumber=lrun,
                    NumberOfEvents=0,
                    NumberOfLumiSections=0,
                    TotalLuminosity=0,
                    StoreNumber=0,
                    StartOfRun=0,
                    EndOfRun=0,
                )
                # Only added if not added by another file in this loop, why waste a call to DBS
                if lrun not in addedRuns:
                    self.dbs.insertRun(run)
                    addedRuns.append(lrun)  # save it so we do not try to add it again to DBS
                    logging.debug("run %s added to DBS " % str(lrun))
                for alsn in runlumiinfo:
                    lumi = DbsLumiSection(
                        LumiSectionNumber=long(alsn),
                        StartEventNumber=0,
                        EndEventNumber=0,
                        LumiStartTime=0,
                        LumiEndTime=0,
                        RunNumber=lrun,
                    )
                    lumiList.append(lumi)

            logging.debug("lumi list created for the file")

            dbsfile = DbsFile(
                # Checksum = str(outFile['cksum']),
                NumberOfEvents=outFile["events"],
                LogicalFileName=outFile["lfn"],
                FileSize=int(outFile["size"]),
                Status="VALID",
                ValidationStatus="VALID",
                FileType="EDM",
                Dataset=procDataset,
                TierList=DBSWriterObjects.makeTierList(procDataset["Path"].split("/")[3]),
                AlgoList=ialgos,
                LumiList=lumiList,
                ParentList=outFile.getParentLFNs(),
                # BranchHash = outFile['BranchHash'],
            )
            # Set checksums by hand
            # dbsfile['Checksum'] = 0  #Set a default?
            for entry in outFile["checksums"].keys():
                # This should be a dictionary with a cktype key and cksum value
                if entry.lower() == "cksum":
                    dbsfile["Checksum"] = str(outFile["checksums"][entry])
                elif entry.lower() == "adler32":
                    dbsfile["Adler32"] = str(outFile["checksums"][entry])
                elif entry.lower() == "md5":
                    dbsfile["Md5"] = str(outFile["checksums"][entry])

            # This check comes from ProdAgent, not sure if its required
            if len(outFile["locations"]) > 0:
                seName = list(outFile["locations"])[0]
                logging.debug("SEname associated to file is: %s" % seName)
            else:
                msg = "Error in DBSWriter.insertFiles\n"
                msg += "No SEname associated to file"
                # print "FAKING seName for now"
                # seName="cmssrm.fnal.gov"
                raise DBSWriterError(msg)
            insertFiles.append(dbsfile)
        #  //Processing Jobs:
        # // Insert the lists of sorted files into the appropriate
        # //  fileblocks

        sumSize = 0
        sumFiles = 0
        tmpFiles = []
        blockList = []
        # First, get the block.  See if the block already exists
        try:
            fileBlock = DBSWriterObjects.getDBSFileBlock(self.dbs, procDataset, seName)
            fileBlock["files"] = []
            # if not fileBlock in affectedBlocks:
            #    affectedBlocks.append(fileBlock)
        except DbsException, ex:
            msg = "Error in DBSWriter.insertFilesForDBSBuffer\n"
            msg += "Cannot retrieve FileBlock for dataset:\n"
            msg += " %s\n" % procDataset["Path"]
            msg += "%s\n" % formatEx(ex)
            raise DBSWriterError(msg)
Example #2
0
    def insertFiles(self, fwkJobRep, insertDetectorData=False):
        """
        _insertFiles_

        Process the files in the FwkJobReport instance and insert
        them into the associated datasets

        A list of affected fileblock names is returned both for merged
        and unmerged fileblocks. Only merged blocks will have to be managed.
        #for merged file
        #blocks to facilitate management of those blocks.
        #This list is not populated for processing jobs since we dont really
        #care about the processing job blocks.

        """

        insertLists = {}
        orderedHashes = []
        affectedBlocks = set()

        if len(fwkJobRep.files) <= 0:
            msg = "Error in DBSWriter.insertFiles\n"
            msg += "No files found in FrameWorkJobReport for:\n"
            msg += "==> JobSpecId: %s" % fwkJobRep.jobSpecId
            msg += " Workflow: %s" % fwkJobRep.workflowSpecId
            raise DBSWriterError(msg)

        for outFile in fwkJobRep.sortFiles():
            #  //
            # // Convert each file into a DBS File object
            #//
            seName = None
            if "SEName" in outFile:
                if outFile['SEName']:
                    seName = outFile['SEName']
                    logging.debug("SEname associated to file is: %s" % seName)


## remove the fallback to site se-name if no SE is associated to File
## because it's likely that there is some stage out problem if there
## is no SEName associated to the file.
#            if not seName:
#                if fwkJobRep.siteDetails.has_key("se-name"):
#                   seName = fwkJobRep.siteDetails['se-name']
#                   seName = str(seName)
#                   logging.debug("site SEname: %s"%seName)
            if not seName:
                msg = "Error in DBSWriter.insertFiles\n"
                msg += "No SEname associated to files in FrameWorkJobReport for "
                #                msg += "No SEname found in FrameWorkJobReport for "
                msg += "==> JobSpecId: %s" % fwkJobRep.jobSpecId
                msg += " Workflow: %s" % fwkJobRep.workflowSpecId
                raise DBSWriterError(msg)
            try:
                if (insertDetectorData):
                    dbsFiles = DBSWriterObjects.createDBSFiles(
                        outFile, fwkJobRep.jobType, self.dbs)
                else:
                    dbsFiles = DBSWriterObjects.createDBSFiles(
                        outFile, fwkJobRep.jobType)
            except DbsException as ex:
                msg = "Error in DBSWriter.insertFiles:\n"
                msg += "Error creating DbsFile instances for file:\n"
                msg += "%s\n" % outFile['LFN']
                msg += "%s\n" % formatEx(ex)
                raise DBSWriterError(msg)

            if len(dbsFiles) <= 0:
                msg = "No DbsFile instances created. Not enough info in the FrameWorkJobReport for"
                msg += "==> JobSpecId: %s" % fwkJobRep.jobSpecId
                msg += " Workflow: %s" % fwkJobRep.workflowSpecId
                raise DBSWriterError(msg)

            for f in dbsFiles:
                datasetName = makeDBSDSName(f)
                hashName = "%s-%s" % (seName, datasetName)

                if hashName not in insertLists:
                    insertLists[hashName] = _InsertFileList(
                        seName, datasetName)
                insertLists[hashName].append(f)

                if not orderedHashes.count(hashName):
                    orderedHashes.append(hashName)

        #  //Processing Jobs:
        # // Insert the lists of sorted files into the appropriate
        #//  fileblocks

        for hash in orderedHashes:

            fileList = insertLists[hash]
            procDataset = fileList[0]['Dataset']

            try:
                fileBlock = DBSWriterObjects.getDBSFileBlock(
                    self.dbs, procDataset, fileList.seName)

            except DbsException as ex:
                msg = "Error in DBSWriter.insertFiles\n"
                msg += "Cannot retrieve FileBlock for dataset:\n"
                msg += " %s\n" % procDataset
                msg += "In Storage Element:\n %s\n" % fileList.seName
                msg += "%s\n" % formatEx(ex)
                raise DBSWriterError(msg)

            if fwkJobRep.jobType == "Merge":
                #  //
                # // Merge files
                #//
                for mergedFile in fileList:
                    mergedFile['Block'] = fileBlock
                    affectedBlocks.add(fileBlock['Name'])
                    msg = "calling: self.dbs.insertMergedFile(%s, %s)" % (str(
                        mergedFile['ParentList']), str(mergedFile))
                    logging.debug(msg)
                    try:
                        self.dbs.insertMergedFile(mergedFile['ParentList'],
                                                  mergedFile)

                    except DbsException as ex:
                        msg = "Error in DBSWriter.insertFiles\n"
                        msg += "Cannot insert merged file:\n"
                        msg += "  %s\n" % mergedFile['LogicalFileName']
                        msg += "%s\n" % formatEx(ex)
                        raise DBSWriterError(msg)
                    logging.debug(
                        "Inserted merged file: %s to FileBlock: %s" %
                        (mergedFile['LogicalFileName'], fileBlock['Name']))
            else:
                #  //
                # // Processing files
                #//
                affectedBlocks.add(fileBlock['Name'])
                msg = "calling: self.dbs.insertFiles(%s, %s, %s)" % (
                    str(procDataset), str(list(fileList)), str(fileBlock))
                logging.debug(msg)

                try:
                    self.dbs.insertFiles(procDataset, list(fileList),
                                         fileBlock)
                except DbsException as ex:
                    msg = "Error in DBSWriter.insertFiles\n"
                    msg += "Cannot insert processed files:\n"
                    msg += " %s\n" % ([x['LogicalFileName']
                                       for x in fileList], )

                    msg += "%s\n" % formatEx(ex)
                    raise DBSWriterError(msg)
                logging.debug("Inserted files: %s to FileBlock: %s" %
                              (([x['LogicalFileName']
                                 for x in fileList]), fileBlock['Name']))

        return list(affectedBlocks)
Example #3
0
            # First see if the block is full
            if self.manageFileBlock(
                fileBlock=fileBlock,
                maxFiles=maxFiles,
                maxSize=maxSize,
                timeOut=timeOut,
                algos=ialgos,
                filesToCommit=filesToCommit,
                procDataset=procDataset,
            ):
                fileBlock["OpenForWriting"] = 0
                if not fileBlock in affectedBlocks:
                    affectedBlocks.append(fileBlock)
                # Then we need a new block
                try:
                    fileBlock = DBSWriterObjects.getDBSFileBlock(self.dbs, procDataset, seName)
                    fileBlock["files"] = []
                except DbsException, ex:
                    msg = "Error in DBSWriter.insertFilesForDBSBuffer\n"
                    msg += "Cannot retrieve FileBlock for dataset:\n"
                    msg += " %s\n" % procDataset["Path"]
                    msg += "%s\n" % formatEx(ex)
                    raise DBSWriterError(msg)

            fileBlock["files"].append(file["LogicalFileName"])
            filesToCommit.append(file)
            if len(filesToCommit) >= fileCommitLength:
                # Only commit the files if there are more of them then the maximum length
                try:
                    self.dbs.insertFiles(procDataset, filesToCommit, fileBlock)
                    filesToCommit = []
Example #4
0
    def insertFilesForDBSBuffer(self,
                                files,
                                procDataset,
                                algos,
                                jobType="NotMerge",
                                insertDetectorData=False,
                                maxFiles=100,
                                maxSize=99999999,
                                timeOut=None,
                                fileCommitLength=5):
        """
        _insertFiles_

        list of files inserted in DBS
        """
        #TODO: Whats the purpose of insertDetectorData

        if len(files) < 1:
            return
        affectedBlocks = []
        insertFiles = []
        addedRuns = []
        seName = None

        #Get the algos in insertable form
        # logging.error("About to input algos")
        # logging.error(algos)
        ialgos = [
            DBSWriterObjects.createAlgorithmForInsert(dict(algo))
            for algo in algos
        ]

        #print ialgos

        for outFile in files:
            #  //
            # // Convert each file into a DBS File object
            #//
            lumiList = []

            #Somehing similar should be the real deal when multiple runs/lumi could be returned from wmbs file

            for runlumiinfo in outFile.getRuns():
                lrun = long(runlumiinfo.run)
                run = DbsRun(
                    RunNumber=lrun,
                    NumberOfEvents=0,
                    NumberOfLumiSections=0,
                    TotalLuminosity=0,
                    StoreNumber=0,
                    StartOfRun=0,
                    EndOfRun=0,
                )
                #Only added if not added by another file in this loop, why waste a call to DBS
                if lrun not in addedRuns:
                    self.dbs.insertRun(run)
                    addedRuns.append(
                        lrun)  #save it so we do not try to add it again to DBS
                    logging.debug("run %s added to DBS " % str(lrun))
                for alsn in runlumiinfo:
                    lumi = DbsLumiSection(
                        LumiSectionNumber=long(alsn),
                        StartEventNumber=0,
                        EndEventNumber=0,
                        LumiStartTime=0,
                        LumiEndTime=0,
                        RunNumber=lrun,
                    )
                    lumiList.append(lumi)

            logging.debug("lumi list created for the file")

            dbsfile = DbsFile(
                #Checksum = str(outFile['cksum']),
                NumberOfEvents=outFile['events'],
                LogicalFileName=outFile['lfn'],
                FileSize=int(outFile['size']),
                Status="VALID",
                ValidationStatus='VALID',
                FileType='EDM',
                Dataset=procDataset,
                TierList=DBSWriterObjects.makeTierList(
                    procDataset['Path'].split('/')[3]),
                AlgoList=ialgos,
                LumiList=lumiList,
                ParentList=outFile.getParentLFNs(),
                #BranchHash = outFile['BranchHash'],
            )
            #Set checksums by hand
            #dbsfile['Checksum'] = 0  #Set a default?
            for entry in outFile['checksums'].keys():
                #This should be a dictionary with a cktype key and cksum value
                if entry.lower() == 'cksum':
                    dbsfile['Checksum'] = str(outFile['checksums'][entry])
                elif entry.lower() == 'adler32':
                    dbsfile['Adler32'] = str(outFile['checksums'][entry])
                elif entry.lower() == 'md5':
                    dbsfile['Md5'] = str(outFile['checksums'][entry])

            #This check comes from ProdAgent, not sure if its required
            if len(outFile["locations"]) > 0:
                seName = list(outFile["locations"])[0]
                logging.debug("SEname associated to file is: %s" % seName)
            else:
                msg = "Error in DBSWriter.insertFiles\n"
                msg += "No SEname associated to file"
                #print "FAKING seName for now"
                #seName="cmssrm.fnal.gov"
                raise DBSWriterError(msg)
            insertFiles.append(dbsfile)
        #  //Processing Jobs:
        # // Insert the lists of sorted files into the appropriate
        #//  fileblocks

        sumSize = 0
        sumFiles = 0
        tmpFiles = []
        blockList = []
        #First, get the block.  See if the block already exists
        try:
            fileBlock = DBSWriterObjects.getDBSFileBlock(
                self.dbs, procDataset, seName)
            fileBlock['files'] = []
            #if not fileBlock in affectedBlocks:
            #    affectedBlocks.append(fileBlock)
        except DbsException as ex:
            msg = "Error in DBSWriter.insertFilesForDBSBuffer\n"
            msg += "Cannot retrieve FileBlock for dataset:\n"
            msg += " %s\n" % procDataset['Path']
            msg += "%s\n" % formatEx(ex)
            raise DBSWriterError(msg)

        filesToCommit = []
        for file in insertFiles:
            # First see if the block is full
            if self.manageFileBlock(fileBlock=fileBlock,
                                    maxFiles=maxFiles,
                                    maxSize=maxSize,
                                    timeOut=timeOut,
                                    algos=ialgos,
                                    filesToCommit=filesToCommit,
                                    procDataset=procDataset):
                fileBlock['OpenForWriting'] = 0
                if not fileBlock in affectedBlocks:
                    affectedBlocks.append(fileBlock)
                # Then we need a new block
                try:
                    fileBlock = DBSWriterObjects.getDBSFileBlock(
                        self.dbs, procDataset, seName)
                    fileBlock['files'] = []
                except DbsException as ex:
                    msg = "Error in DBSWriter.insertFilesForDBSBuffer\n"
                    msg += "Cannot retrieve FileBlock for dataset:\n"
                    msg += " %s\n" % procDataset['Path']
                    msg += "%s\n" % formatEx(ex)
                    raise DBSWriterError(msg)

            fileBlock['files'].append(file['LogicalFileName'])
            filesToCommit.append(file)
            if len(filesToCommit) >= fileCommitLength:
                # Only commit the files if there are more of them then the maximum length
                try:
                    self.dbs.insertFiles(procDataset, filesToCommit, fileBlock)
                    filesToCommit = []
                    logging.debug("Inserted files: %s to FileBlock: %s" \
                                  % ( ([ x['LogicalFileName'] for x in insertFiles ]),fileBlock['Name']))

                except DbsException as ex:
                    msg = "Error in DBSWriter.insertFiles\n"
                    msg += "Cannot insert processed files:\n"
                    msg += " %s\n" % (
                        [x['LogicalFileName'] for x in insertFiles], )
                    msg += "%s\n" % formatEx(ex)
                    raise DBSWriterError(msg)

        if len(filesToCommit) > 0:
            try:
                self.dbs.insertFiles(procDataset, filesToCommit, fileBlock)
                filesToCommit = []
                logging.debug("Inserted files: %s to FileBlock: %s" \
                              % ( ([ x['LogicalFileName'] for x in insertFiles ]),fileBlock['Name']))

            except DbsException as ex:
                msg = "Error in DBSWriter.insertFiles\n"
                msg += "Cannot insert processed files:\n"
                msg += " %s\n" % ([x['LogicalFileName']
                                   for x in insertFiles], )
                msg += "%s\n" % formatEx(ex)
                raise DBSWriterError(msg)

        if not fileBlock in affectedBlocks:
            affectedBlocks.append(fileBlock)

        ## Do bulk inserts now for DBS
        #filesToCommit = []
        #count         = 0
        #count2        = 0
        #for file in insertFiles:
        #    count += 1
        #    #Try and close the box
        #    logging.error("Should have a file")
        #    logging.error(len(filesToCommit))
        #    count2 += len(filesToCommit)
        #    if self.manageFileBlock(fileBlock = fileBlock, maxFiles = maxFiles,
        #                            maxSize = maxSize, timeOut = timeOut, algos = ialgos,
        #                            filesToCommit = filesToCommit, procDataset = procDataset):
        #        fileBlock['OpenForWriting'] = '0'
        #        if not fileBlock in affectedBlocks:
        #            affectedBlocks.append(fileBlock)
        #
        #
        #
        #        # Then we need a new block
        #        try:
        #            fileBlock = DBSWriterObjects.getDBSFileBlock(
        #                self.dbs,
        #                procDataset,
        #                seName)
        #            fileBlock['files'] = []
        #        except DbsException, ex:
        #            msg = "Error in DBSWriter.insertFilesForDBSBuffer\n"
        #            msg += "Cannot retrieve FileBlock for dataset:\n"
        #            msg += " %s\n" % procDataset['Path']
        #            msg += "%s\n" % formatEx(ex)
        #            raise DBSWriterError(msg)
        #    #At this point, we should commit the block as is
        #    fileBlock['files'].append(file['LogicalFileName'])
        #    if jobType == "MergeSpecial":
        #        for file in fileList:
        #            file['Block'] = fileBlock
        #            msg="calling: self.dbs.insertMergedFile(%s, %s)" % (str(file['ParentList']),str(file))
        #            logging.debug(msg)
        #            try:
        #                #
        #                #
        #                # NOTE To Anzar From Anzar (File cloning as in DBS API can be done here and then I can use Bulk insert on Merged files as well)
        #                self.dbs.insertMergedFile(file['ParentList'],
        #                                          file)
        #
        #            except DbsException, ex:
        #                msg = "Error in DBSWriter.insertFiles\n"
        #                msg += "Cannot insert merged file:\n"
        #                msg += "  %s\n" % file['LogicalFileName']
        #                msg += "%s\n" % formatEx(ex)
        #                raise DBSWriterError(msg)
        #            logging.debug("Inserted merged file: %s to FileBlock: %s"%(file['LogicalFileName'],fileBlock['Name']))
        #    else:
        #        filesToCommit.append(file)
        #        if len(filesToCommit) >= fileCommitLength:
        #            # Only commit the files if there are more of them then the maximum length
        #            try:
        #                logging.error("About to commit %i files" %(len(filesToCommit)))
        #                count2 += len(filesToCommit)
        #                self.dbs.insertFiles(procDataset, filesToCommit, fileBlock)
        #                filesToCommit = []
        #                logging.debug("Inserted files: %s to FileBlock: %s" \
        #                              % ( ([ x['LogicalFileName'] for x in insertFiles ]),fileBlock['Name']))
        #
        #            except DbsException, ex:
        #                msg = "Error in DBSWriter.insertFiles\n"
        #                msg += "Cannot insert processed files:\n"
        #                msg += " %s\n" % ([ x['LogicalFileName'] for x in insertFiles ],)
        #                msg += "%s\n" % formatEx(ex)
        #                raise DBSWriterError(msg)
        #
        #
        #
        #
        ## If we still have files to commit, commit them
        #logging.error("Got to the end of the loop")
        #logging.error(len(filesToCommit))
        #logging.error(count2)
        #if len(filesToCommit) > 0:
        #    try:
        #        logging.error("About to insert some files")
        #        self.dbs.insertFiles(procDataset, filesToCommit, fileBlock)
        #        filesToCommit = []
        #        logging.debug("Inserted files: %s to FileBlock: %s" \
        #                      % ( ([ x['LogicalFileName'] for x in insertFiles ]),fileBlock['Name']))
        #
        #    except DbsException, ex:
        #        msg = "Error in DBSWriter.insertFiles\n"
        #        msg += "Cannot insert processed files:\n"
        #        msg += " %s\n" % ([ x['LogicalFileName'] for x in insertFiles ],)
        #        msg += "%s\n" % formatEx(ex)
        #        raise DBSWriterError(msg)

        if not fileBlock in affectedBlocks:
            affectedBlocks.append(fileBlock)

        return list(affectedBlocks)
Example #5
0
    def insertFilesForDBSBuffer(self,
                                files,
                                procDataset,
                                algos,
                                jobType="NotMerge",
                                insertDetectorData=False,
                                maxFiles=100,
                                maxSize=99999999,
                                timeOut=None,
                                fileCommitLength=5):
        """
        _insertFiles_

        list of files inserted in DBS
        """
        #TODO: Whats the purpose of insertDetectorData

        if len(files) < 1:
            return
        affectedBlocks = []
        insertFiles = []
        addedRuns = []
        seName = None

        #Get the algos in insertable form
        # logging.error("About to input algos")
        # logging.error(algos)
        ialgos = [
            DBSWriterObjects.createAlgorithmForInsert(dict(algo))
            for algo in algos
        ]

        #print ialgos

        for outFile in files:
            #  //
            # // Convert each file into a DBS File object
            #//
            lumiList = []

            #Somehing similar should be the real deal when multiple runs/lumi could be returned from wmbs file

            for runlumiinfo in outFile.getRuns():
                lrun = long(runlumiinfo.run)
                run = DbsRun(
                    RunNumber=lrun,
                    NumberOfEvents=0,
                    NumberOfLumiSections=0,
                    TotalLuminosity=0,
                    StoreNumber=0,
                    StartOfRun=0,
                    EndOfRun=0,
                )
                #Only added if not added by another file in this loop, why waste a call to DBS
                if lrun not in addedRuns:
                    self.dbs.insertRun(run)
                    addedRuns.append(
                        lrun)  #save it so we do not try to add it again to DBS
                    logging.debug("run %s added to DBS " % str(lrun))
                for alsn in runlumiinfo:
                    lumi = DbsLumiSection(
                        LumiSectionNumber=long(alsn),
                        StartEventNumber=0,
                        EndEventNumber=0,
                        LumiStartTime=0,
                        LumiEndTime=0,
                        RunNumber=lrun,
                    )
                    lumiList.append(lumi)

            logging.debug("lumi list created for the file")

            dbsfile = DbsFile(
                #Checksum = str(outFile['cksum']),
                NumberOfEvents=outFile['events'],
                LogicalFileName=outFile['lfn'],
                FileSize=int(outFile['size']),
                Status="VALID",
                ValidationStatus='VALID',
                FileType='EDM',
                Dataset=procDataset,
                TierList=DBSWriterObjects.makeTierList(
                    procDataset['Path'].split('/')[3]),
                AlgoList=ialgos,
                LumiList=lumiList,
                ParentList=outFile.getParentLFNs(),
                #BranchHash = outFile['BranchHash'],
            )
            #Set checksums by hand
            #dbsfile['Checksum'] = 0  #Set a default?
            for entry in outFile['checksums'].keys():
                #This should be a dictionary with a cktype key and cksum value
                if entry.lower() == 'cksum':
                    dbsfile['Checksum'] = str(outFile['checksums'][entry])
                elif entry.lower() == 'adler32':
                    dbsfile['Adler32'] = str(outFile['checksums'][entry])
                elif entry.lower() == 'md5':
                    dbsfile['Md5'] = str(outFile['checksums'][entry])

            #This check comes from ProdAgent, not sure if its required
            if len(outFile["locations"]) > 0:
                seName = list(outFile["locations"])[0]
                logging.debug("SEname associated to file is: %s" % seName)
            else:
                msg = "Error in DBSWriter.insertFiles\n"
                msg += "No SEname associated to file"
                #print "FAKING seName for now"
                #seName="cmssrm.fnal.gov"
                raise DBSWriterError(msg)
            insertFiles.append(dbsfile)
        #  //Processing Jobs:
        # // Insert the lists of sorted files into the appropriate
        #//  fileblocks

        sumSize = 0
        sumFiles = 0
        tmpFiles = []
        blockList = []
        #First, get the block.  See if the block already exists
        try:
            fileBlock = DBSWriterObjects.getDBSFileBlock(
                self.dbs, procDataset, seName)
            fileBlock['files'] = []
            #if not fileBlock in affectedBlocks:
            #    affectedBlocks.append(fileBlock)
        except DbsException, ex:
            msg = "Error in DBSWriter.insertFilesForDBSBuffer\n"
            msg += "Cannot retrieve FileBlock for dataset:\n"
            msg += " %s\n" % procDataset['Path']
            msg += "%s\n" % formatEx(ex)
            raise DBSWriterError(msg)
Example #6
0
        filesToCommit = []
        for file in insertFiles:
            # First see if the block is full
            if self.manageFileBlock(fileBlock=fileBlock,
                                    maxFiles=maxFiles,
                                    maxSize=maxSize,
                                    timeOut=timeOut,
                                    algos=ialgos,
                                    filesToCommit=filesToCommit,
                                    procDataset=procDataset):
                fileBlock['OpenForWriting'] = 0
                if not fileBlock in affectedBlocks:
                    affectedBlocks.append(fileBlock)
                # Then we need a new block
                try:
                    fileBlock = DBSWriterObjects.getDBSFileBlock(
                        self.dbs, procDataset, seName)
                    fileBlock['files'] = []
                except DbsException, ex:
                    msg = "Error in DBSWriter.insertFilesForDBSBuffer\n"
                    msg += "Cannot retrieve FileBlock for dataset:\n"
                    msg += " %s\n" % procDataset['Path']
                    msg += "%s\n" % formatEx(ex)
                    raise DBSWriterError(msg)

            fileBlock['files'].append(file['LogicalFileName'])
            filesToCommit.append(file)
            if len(filesToCommit) >= fileCommitLength:
                # Only commit the files if there are more of them then the maximum length
                try:
                    self.dbs.insertFiles(procDataset, filesToCommit, fileBlock)
                    filesToCommit = []
Example #7
0
    def insertFiles(self, fwkJobRep, insertDetectorData = False):
        """
        _insertFiles_

        Process the files in the FwkJobReport instance and insert
        them into the associated datasets

        A list of affected fileblock names is returned both for merged
        and unmerged fileblocks. Only merged blocks will have to be managed.
        #for merged file
        #blocks to facilitate management of those blocks.
        #This list is not populated for processing jobs since we dont really
        #care about the processing job blocks.

        """

        insertLists = {}
        orderedHashes = []
        affectedBlocks = set()

        if len(fwkJobRep.files)<=0:
            msg = "Error in DBSWriter.insertFiles\n"
            msg += "No files found in FrameWorkJobReport for:\n"
            msg += "==> JobSpecId: %s"%fwkJobRep.jobSpecId
            msg += " Workflow: %s"%fwkJobRep.workflowSpecId
            raise DBSWriterError(msg)


        for outFile in fwkJobRep.sortFiles():
            #  //
            # // Convert each file into a DBS File object
            #//
            pnn = None
            if "PNN" in outFile:
                if outFile['PNN'] :
                    pnn = outFile['PNN']
                    logging.debug("PNN associated to file is: %s"%pnn)
## remove the fallback to site se-name if no SE is associated to File
## because it's likely that there is some stage out problem if there
## is no SEName associated to the file.
#            if not seName:
#                if fwkJobRep.siteDetails.has_key("se-name"):
#                   seName = fwkJobRep.siteDetails['se-name']
#                   seName = str(seName)
#                   logging.debug("site SEname: %s"%seName)
            if not pnn:
                msg = "Error in DBSWriter.insertFiles\n"
                msg += "No PNN associated to files in FrameWorkJobReport for "
#                msg += "No SEname found in FrameWorkJobReport for "
                msg += "==> JobSpecId: %s"%fwkJobRep.jobSpecId
                msg += " Workflow: %s"%fwkJobRep.workflowSpecId
                raise DBSWriterError(msg)
            try:
                if ( insertDetectorData ):
                    dbsFiles = DBSWriterObjects.createDBSFiles(outFile,
                                                               fwkJobRep.jobType,
                                                               self.dbs)
                else:
                    dbsFiles = DBSWriterObjects.createDBSFiles(outFile,
                                                               fwkJobRep.jobType)
            except DbsException as ex:
                msg = "Error in DBSWriter.insertFiles:\n"
                msg += "Error creating DbsFile instances for file:\n"
                msg += "%s\n" % outFile['LFN']
                msg += "%s\n" % formatEx(ex)
                raise DBSWriterError(msg)

            if len(dbsFiles)<=0:
                msg="No DbsFile instances created. Not enough info in the FrameWorkJobReport for"
                msg += "==> JobSpecId: %s"%fwkJobRep.jobSpecId
                msg += " Workflow: %s"%fwkJobRep.workflowSpecId
                raise DBSWriterError(msg)

            for f in dbsFiles:
                datasetName = makeDBSDSName(f)
                hashName = "%s-%s" % (pnn, datasetName)

                if hashName not in insertLists:
                    insertLists[hashName] = _InsertFileList(pnn,
                                                            datasetName)
                insertLists[hashName].append(f)

                if not orderedHashes.count(hashName):
                    orderedHashes.append(hashName)


        #  //Processing Jobs:
        # // Insert the lists of sorted files into the appropriate
        #//  fileblocks

        for hash in orderedHashes:

            fileList = insertLists[hash]
            procDataset = fileList[0]['Dataset']


            try:
                fileBlock = DBSWriterObjects.getDBSFileBlock(
                    self.dbs,
                    procDataset,
                    fileList.pnn)

            except DbsException as ex:
                msg = "Error in DBSWriter.insertFiles\n"
                msg += "Cannot retrieve FileBlock for dataset:\n"
                msg += " %s\n" % procDataset
#                msg += "In Storage Element:\n %s\n" % fileList.seName
                msg += "In PNN:\n %s\n" % fileList.pnn
                msg += "%s\n" % formatEx(ex)
                raise DBSWriterError(msg)

            if fwkJobRep.jobType == "Merge":
                #  //
                # // Merge files
                #//
                for mergedFile in fileList:
                    mergedFile['Block'] = fileBlock
                    affectedBlocks.add(fileBlock['Name'])
                    msg="calling: self.dbs.insertMergedFile(%s, %s)" % (str(mergedFile['ParentList']),str(mergedFile))
                    logging.debug(msg)
                    try:
                        self.dbs.insertMergedFile(mergedFile['ParentList'],
                                                  mergedFile)

                    except DbsException as ex:
                        msg = "Error in DBSWriter.insertFiles\n"
                        msg += "Cannot insert merged file:\n"
                        msg += "  %s\n" % mergedFile['LogicalFileName']
                        msg += "%s\n" % formatEx(ex)
                        raise DBSWriterError(msg)
                    logging.debug("Inserted merged file: %s to FileBlock: %s"%(mergedFile['LogicalFileName'],fileBlock['Name']))
            else:
                #  //
                # // Processing files
                #//
                affectedBlocks.add(fileBlock['Name'])
                msg="calling: self.dbs.insertFiles(%s, %s, %s)" % (str(procDataset),str(list(fileList)),str(fileBlock))
                logging.debug(msg)

                try:
                    self.dbs.insertFiles(procDataset, list(fileList),
                                         fileBlock)
                except DbsException as ex:
                    msg = "Error in DBSWriter.insertFiles\n"
                    msg += "Cannot insert processed files:\n"
                    msg += " %s\n" % (
                        [ x['LogicalFileName'] for x in fileList ],
                        )

                    msg += "%s\n" % formatEx(ex)
                    raise DBSWriterError(msg)
                logging.debug("Inserted files: %s to FileBlock: %s"%( ([ x['LogicalFileName'] for x in fileList ]),fileBlock['Name']))

        return list(affectedBlocks)
Example #8
0
    def insertFilesForDBSBuffer(self, files, procDataset, algos,
                                jobType = "NotMerge", insertDetectorData = False,
                                maxFiles = 100, maxSize = 99999999, timeOut = None,
                                fileCommitLength = 5):
        """
        _insertFiles_

        list of files inserted in DBS
        """
        #TODO: Whats the purpose of insertDetectorData

        if len(files) < 1:
            return
        affectedBlocks = []
        insertFiles =  []
        addedRuns=[]
        pnn = None

        #Get the algos in insertable form
        # logging.error("About to input algos")
        # logging.error(algos)
        ialgos = [DBSWriterObjects.createAlgorithmForInsert(dict(algo)) for algo in algos ]

        #print ialgos

        for outFile in files:
            #  //
            # // Convert each file into a DBS File object
            #//
            lumiList = []

            #Somehing similar should be the real deal when multiple runs/lumi could be returned from wmbs file

            for runlumiinfo in outFile.getRuns():
                lrun=long(runlumiinfo.run)
                run = DbsRun(
                    RunNumber = lrun,
                    NumberOfEvents = 0,
                    NumberOfLumiSections = 0,
                    TotalLuminosity = 0,
                    StoreNumber = 0,
                    StartOfRun = 0,
                    EndOfRun = 0,
                    )
                #Only added if not added by another file in this loop, why waste a call to DBS
                if lrun not in addedRuns:
                    self.dbs.insertRun(run)
                    addedRuns.append(lrun) #save it so we do not try to add it again to DBS
                    logging.debug("run %s added to DBS " % str(lrun))
                for alsn in runlumiinfo:
                    lumi = DbsLumiSection(
                            LumiSectionNumber = long(alsn),
                            StartEventNumber = 0,
                            EndEventNumber = 0,
                            LumiStartTime = 0,
                            LumiEndTime = 0,
                            RunNumber = lrun,
                    )
                    lumiList.append(lumi)

            logging.debug("lumi list created for the file")

            dbsfile = DbsFile(
                              #Checksum = str(outFile['cksum']),
                              NumberOfEvents = outFile['events'],
                              LogicalFileName = outFile['lfn'],
                              FileSize = int(outFile['size']),
                              Status = "VALID",
                              ValidationStatus = 'VALID',
                              FileType = 'EDM',
                              Dataset = procDataset,
                              TierList = DBSWriterObjects.makeTierList(procDataset['Path'].split('/')[3]),
                              AlgoList = ialgos,
                              LumiList = lumiList,
                              ParentList = outFile.getParentLFNs(),
                              #BranchHash = outFile['BranchHash'],
                            )
            #Set checksums by hand
            #dbsfile['Checksum'] = 0  #Set a default?
            for entry in outFile['checksums'].keys():
                #This should be a dictionary with a cktype key and cksum value
                if entry.lower() == 'cksum':
                    dbsfile['Checksum'] = str(outFile['checksums'][entry])
                elif entry.lower() == 'adler32':
                    dbsfile['Adler32'] = str(outFile['checksums'][entry])
                elif entry.lower() == 'md5':
                    dbsfile['Md5'] = str(outFile['checksums'][entry])



            #This check comes from ProdAgent, not sure if its required
            if len(outFile["locations"]) > 0:
                pnn = list(outFile["locations"])[0]
                logging.debug("PNN associated to file is: %s"%pnn)
            else:
                msg = "Error in DBSWriter.insertFiles\n"
                msg += "No PNN associated to file"
                #print "FAKING seName for now"
                #seName="cmssrm.fnal.gov"
                raise DBSWriterError(msg)
            insertFiles.append(dbsfile)
        #  //Processing Jobs:
        # // Insert the lists of sorted files into the appropriate
        #//  fileblocks


        sumSize   = 0
        sumFiles  = 0
        tmpFiles  = []
        blockList = []
        #First, get the block.  See if the block already exists
        try:
            fileBlock = DBSWriterObjects.getDBSFileBlock(
                self.dbs,
                procDataset,
                pnn)
            fileBlock['files'] = []
            #if not fileBlock in affectedBlocks:
            #    affectedBlocks.append(fileBlock)
        except DbsException as ex:
            msg = "Error in DBSWriter.insertFilesForDBSBuffer\n"
            msg += "Cannot retrieve FileBlock for dataset:\n"
            msg += " %s\n" % procDataset['Path']
            msg += "%s\n" % formatEx(ex)
            raise DBSWriterError(msg)



        filesToCommit = []
        for file in insertFiles:
            # First see if the block is full
            if self.manageFileBlock(fileBlock = fileBlock, maxFiles = maxFiles,
                                    maxSize = maxSize, timeOut = timeOut, algos = ialgos,
                                    filesToCommit = filesToCommit, procDataset = procDataset):
                fileBlock['OpenForWriting'] = 0
                if not fileBlock in affectedBlocks:
                    affectedBlocks.append(fileBlock)
                # Then we need a new block
                try:
                    fileBlock = DBSWriterObjects.getDBSFileBlock(
                        self.dbs,
                        procDataset,
                        pnn)
                    fileBlock['files'] = []
                except DbsException as ex:
                    msg = "Error in DBSWriter.insertFilesForDBSBuffer\n"
                    msg += "Cannot retrieve FileBlock for dataset:\n"
                    msg += " %s\n" % procDataset['Path']
                    msg += "%s\n" % formatEx(ex)
                    raise DBSWriterError(msg)

            fileBlock['files'].append(file['LogicalFileName'])
            filesToCommit.append(file)
            if len(filesToCommit) >= fileCommitLength:
                    # Only commit the files if there are more of them then the maximum length
                try:
                    self.dbs.insertFiles(procDataset, filesToCommit, fileBlock)
                    filesToCommit = []
                    logging.debug("Inserted files: %s to FileBlock: %s" \
                                  % ( ([ x['LogicalFileName'] for x in insertFiles ]),fileBlock['Name']))

                except DbsException as ex:
                    msg = "Error in DBSWriter.insertFiles\n"
                    msg += "Cannot insert processed files:\n"
                    msg += " %s\n" % ([ x['LogicalFileName'] for x in insertFiles ],)
                    msg += "%s\n" % formatEx(ex)
                    raise DBSWriterError(msg)


        if len(filesToCommit) > 0:
            try:
                self.dbs.insertFiles(procDataset, filesToCommit, fileBlock)
                filesToCommit = []
                logging.debug("Inserted files: %s to FileBlock: %s" \
                              % ( ([ x['LogicalFileName'] for x in insertFiles ]),fileBlock['Name']))

            except DbsException as ex:
                msg = "Error in DBSWriter.insertFiles\n"
                msg += "Cannot insert processed files:\n"
                msg += " %s\n" % ([ x['LogicalFileName'] for x in insertFiles ],)
                msg += "%s\n" % formatEx(ex)
                raise DBSWriterError(msg)


        if not fileBlock in affectedBlocks:
            affectedBlocks.append(fileBlock)




        ## Do bulk inserts now for DBS
        #filesToCommit = []
        #count         = 0
        #count2        = 0
        #for file in insertFiles:
        #    count += 1
        #    #Try and close the box
        #    logging.error("Should have a file")
        #    logging.error(len(filesToCommit))
        #    count2 += len(filesToCommit)
        #    if self.manageFileBlock(fileBlock = fileBlock, maxFiles = maxFiles,
        #                            maxSize = maxSize, timeOut = timeOut, algos = ialgos,
        #                            filesToCommit = filesToCommit, procDataset = procDataset):
        #        fileBlock['OpenForWriting'] = '0'
        #        if not fileBlock in affectedBlocks:
        #            affectedBlocks.append(fileBlock)
        #
        #
        #
        #        # Then we need a new block
        #        try:
        #            fileBlock = DBSWriterObjects.getDBSFileBlock(
        #                self.dbs,
        #                procDataset,
        #                seName)
        #            fileBlock['files'] = []
        #        except DbsException, ex:
        #            msg = "Error in DBSWriter.insertFilesForDBSBuffer\n"
        #            msg += "Cannot retrieve FileBlock for dataset:\n"
        #            msg += " %s\n" % procDataset['Path']
        #            msg += "%s\n" % formatEx(ex)
        #            raise DBSWriterError(msg)
        #    #At this point, we should commit the block as is
        #    fileBlock['files'].append(file['LogicalFileName'])
        #    if jobType == "MergeSpecial":
        #        for file in fileList:
        #            file['Block'] = fileBlock
        #            msg="calling: self.dbs.insertMergedFile(%s, %s)" % (str(file['ParentList']),str(file))
        #            logging.debug(msg)
        #            try:
        #                #
        #                #
        #                # NOTE To Anzar From Anzar (File cloning as in DBS API can be done here and then I can use Bulk insert on Merged files as well)
        #                self.dbs.insertMergedFile(file['ParentList'],
        #                                          file)
        #
        #            except DbsException, ex:
        #                msg = "Error in DBSWriter.insertFiles\n"
        #                msg += "Cannot insert merged file:\n"
        #                msg += "  %s\n" % file['LogicalFileName']
        #                msg += "%s\n" % formatEx(ex)
        #                raise DBSWriterError(msg)
        #            logging.debug("Inserted merged file: %s to FileBlock: %s"%(file['LogicalFileName'],fileBlock['Name']))
        #    else:
        #        filesToCommit.append(file)
        #        if len(filesToCommit) >= fileCommitLength:
        #            # Only commit the files if there are more of them then the maximum length
        #            try:
        #                logging.error("About to commit %i files" %(len(filesToCommit)))
        #                count2 += len(filesToCommit)
        #                self.dbs.insertFiles(procDataset, filesToCommit, fileBlock)
        #                filesToCommit = []
        #                logging.debug("Inserted files: %s to FileBlock: %s" \
        #                              % ( ([ x['LogicalFileName'] for x in insertFiles ]),fileBlock['Name']))
        #
        #            except DbsException, ex:
        #                msg = "Error in DBSWriter.insertFiles\n"
        #                msg += "Cannot insert processed files:\n"
        #                msg += " %s\n" % ([ x['LogicalFileName'] for x in insertFiles ],)
        #                msg += "%s\n" % formatEx(ex)
        #                raise DBSWriterError(msg)
        #
        #
        #
        #
        ## If we still have files to commit, commit them
        #logging.error("Got to the end of the loop")
        #logging.error(len(filesToCommit))
        #logging.error(count2)
        #if len(filesToCommit) > 0:
        #    try:
        #        logging.error("About to insert some files")
        #        self.dbs.insertFiles(procDataset, filesToCommit, fileBlock)
        #        filesToCommit = []
        #        logging.debug("Inserted files: %s to FileBlock: %s" \
        #                      % ( ([ x['LogicalFileName'] for x in insertFiles ]),fileBlock['Name']))
        #
        #    except DbsException, ex:
        #        msg = "Error in DBSWriter.insertFiles\n"
        #        msg += "Cannot insert processed files:\n"
        #        msg += " %s\n" % ([ x['LogicalFileName'] for x in insertFiles ],)
        #        msg += "%s\n" % formatEx(ex)
        #        raise DBSWriterError(msg)


        if not fileBlock in affectedBlocks:
            affectedBlocks.append(fileBlock)

        return list(affectedBlocks)