def getChunkFiles(self, collectionName, filesetName, chunkOffset, chunkSize=100, user="******", group="cmsdataops"): """ _getChunkFiles_ Retrieve a chunk of files from the given collection and task. """ chunkFiles = [] files = self._getFilesetInfo(collectionName, filesetName, user, group, chunkOffset, chunkSize) files = mergeFakeFiles(files) for fileInfo in files: newFile = File(lfn=fileInfo["lfn"], size=fileInfo["size"], events=fileInfo["events"], parents=set(fileInfo["parents"]), locations=set(fileInfo["locations"]), merged=fileInfo["merged"]) for run in fileInfo["runs"]: newRun = Run(run["run_number"]) newRun.extend(run["lumis"]) newFile.addRun(newRun) chunkFiles.append(newFile) return chunkFiles
def findUploadableFilesByDAS(self, datasetpath): """ _findUploadableDAS_ Find all the uploadable files for a given DatasetPath. """ dbsFiles = [] findFiles = self.daoFactory(classname="LoadDBSFilesByDAS") results = findFiles.execute(datasetpath=datasetpath, transaction=False) for entry in results: # Add loaded information dbsfile = DBSBufferFile(id=entry['id']) dbsfile.update(entry) dbsFiles.append(dbsfile) for dbsfile in dbsFiles: if 'runInfo' in dbsfile.keys(): # Then we have to replace it with a real run for r in dbsfile['runInfo'].keys(): run = Run(runNumber=r) run.extend(dbsfile['runInfo'][r]) dbsfile.addRun(run) del dbsfile['runInfo'] if 'parentLFNs' in dbsfile.keys(): # Then we have some parents for lfn in dbsfile['parentLFNs']: newFile = DBSBufferFile(lfn=lfn) dbsfile['parents'].add(newFile) del dbsfile['parentLFNs'] return dbsFiles
def findUploadableFilesByDAS(self, datasetpath): """ _findUploadableDAS_ Find all the uploadable files for a given DatasetPath. """ dbsFiles = [] findFiles = self.daoFactory(classname = "LoadDBSFilesByDAS") results = findFiles.execute(datasetpath = datasetpath, transaction = False) for entry in results: # Add loaded information dbsfile = DBSBufferFile(id=entry['id']) dbsfile.update(entry) dbsFiles.append(dbsfile) for dbsfile in dbsFiles: if 'runInfo' in dbsfile.keys(): # Then we have to replace it with a real run for r in dbsfile['runInfo'].keys(): run = Run(runNumber = r) run.extend(dbsfile['runInfo'][r]) dbsfile.addRun(run) del dbsfile['runInfo'] if 'parentLFNs' in dbsfile.keys(): # Then we have some parents for lfn in dbsfile['parentLFNs']: newFile = DBSBufferFile(lfn = lfn) dbsfile['parents'].add(newFile) del dbsfile['parentLFNs'] return dbsFiles
def loadFilesByBlock(self, blockname): """ _loadFilesByBlock_ Get all files associated with a block """ dbsFiles = [] findFiles = self.daoFactory(classname = "LoadFilesByBlock") results = findFiles.execute(blockname = blockname, transaction = False) for entry in results: # Add loaded information dbsfile = DBSBufferFile(id=entry['id']) dbsfile.update(entry) dbsFiles.append(dbsfile) for dbsfile in dbsFiles: if 'runInfo' in dbsfile.keys(): # Then we have to replace it with a real run for r in dbsfile['runInfo'].keys(): run = Run(runNumber = r) run.extend(dbsfile['runInfo'][r]) dbsfile.addRun(run) del dbsfile['runInfo'] if 'parentLFNs' in dbsfile.keys(): # Then we have some parents for lfn in dbsfile['parentLFNs']: newFile = DBSBufferFile(lfn = lfn) dbsfile['parents'].add(newFile) del dbsfile['parentLFNs'] return dbsFiles
def getChunkFiles(self, collectionName, filesetName, chunkOffset, chunkSize = 100, user = "******", group = "cmsdataops"): """ _getChunkFiles_ Retrieve a chunk of files from the given collection and task. """ chunkFiles = [] result = self.couchdb.loadView("ACDC", "owner_coll_fileset_files", {"startkey": [group, user, collectionName, filesetName], "endkey": [group, user, collectionName, filesetName, {}], "limit": chunkSize, "skip": chunkOffset, }, []) for row in result["rows"]: resultRow = row['value'] newFile = File(lfn = resultRow["lfn"], size = resultRow["size"], events = resultRow["events"], parents = set(resultRow["parents"]), locations = set(resultRow["locations"]), merged = resultRow["merged"]) for run in resultRow["runs"]: newRun = Run(run["run_number"]) newRun.extend(run["lumis"]) newFile.addRun(newRun) chunkFiles.append(newFile) return chunkFiles
def loadFilesByBlock(self, blockname): """ _loadFilesByBlock_ Get all files associated with a block """ dbsFiles = [] findFiles = self.daoFactory(classname="LoadFilesByBlock") results = findFiles.execute(blockname=blockname, transaction=False) for entry in results: # Add loaded information dbsfile = DBSBufferFile(id=entry['id']) dbsfile.update(entry) dbsFiles.append(dbsfile) for dbsfile in dbsFiles: if 'runInfo' in dbsfile.keys(): # Then we have to replace it with a real run for r in dbsfile['runInfo'].keys(): run = Run(runNumber=r) run.extend(dbsfile['runInfo'][r]) dbsfile.addRun(run) del dbsfile['runInfo'] if 'parentLFNs' in dbsfile.keys(): # Then we have some parents for lfn in dbsfile['parentLFNs']: newFile = DBSBufferFile(lfn=lfn) dbsfile['parents'].add(newFile) del dbsfile['parentLFNs'] return dbsFiles
def prepareDBSFiles(self): """ _prepareDBSFiles_ Retrieve the information from the JSON input data and create DBSFile objects that can be registered in the database. """ timestamp = time.strftime('%m%d%y_%H%M%S') for fileEntry in self.inputData: # Get all the info out of a standard named dataset datasetInfo = str(fileEntry["dataset"]) tokens = datasetInfo.split('/') primDs = tokens[1] procDs = tokens[2] dataTier = tokens[3] procDsTokens = procDs.split('-') acqEra = procDsTokens[0] procVer = procDsTokens[-1][1:] ckSumInfo = fileEntry["checksums"] for entry in ckSumInfo: ckSumInfo[entry] = str(ckSumInfo[entry]) # Build the basic dbsBuffer file dbsFile = DBSBufferFile(lfn = str(fileEntry["lfn"]), size = int(fileEntry.get("size", 0)), events = int(fileEntry.get("events", 0)), checksums = ckSumInfo, status = "NOTUPLOADED") dbsFile.setAlgorithm(appName = "cmsRun", appVer = str(fileEntry.get("cmssw", "LEGACY")), appFam = "Legacy", psetHash = "GIBBERISH", configContent = "None;;None;;None") dbsFile.setDatasetPath("/%s/%s/%s" % (primDs, procDs, dataTier)) dbsFile.setValidStatus(validStatus = "PRODUCTION") dbsFile.setProcessingVer(ver = procVer) dbsFile.setAcquisitionEra(era = acqEra) dbsFile.setGlobalTag(globalTag = str(fileEntry.get('globalTag', "LEGACY"))) # Build a representative task name dbsFile['task'] = '/LegacyInsertionTask_%s/Insertion' % timestamp # Get the runs and lumis runsAndLumis = fileEntry.get("runsAndLumis", {}) for run in runsAndLumis: newRun = Run(runNumber = int(run)) newRun.extend([int(x) for x in runsAndLumis[run]]) dbsFile.addRun(newRun) # Complete the file information with the location and queue it dbsFile.setLocation(se = str(fileEntry["location"]), immediateSave = False) self.dbsFilesToCreate.append(dbsFile) self.inputData = None return
def loadDBSBufferFilesBulk(self, fileObjs): """ _loadDBSBufferFilesBulk_ Yes, this is a stupid place to put it. No, there's not better place. """ myThread = threading.currentThread() dbsFiles = [] existingTransaction = self.beginTransaction() factory = DAOFactory(package = "WMComponent.DBSBuffer.Database", logger = myThread.logger, dbinterface = myThread.dbi) binds = [] for f in fileObjs: binds.append(f["id"]) loadFiles = factory(classname = "DBSBufferFiles.LoadBulkFilesByID") results = loadFiles.execute(files = binds, conn = self.getDBConn(), transaction = self.existingTransaction()) for entry in results: # Add loaded information dbsfile = DBSBufferFile(id=entry['id']) dbsfile.update(entry) dbsFiles.append(dbsfile) for dbsfile in dbsFiles: if 'runInfo' in dbsfile.keys(): # Then we have to replace it with a real run for r in dbsfile['runInfo'].keys(): run = Run(runNumber = r) run.extend(dbsfile['runInfo'][r]) dbsfile.addRun(run) del dbsfile['runInfo'] if 'parentLFNs' in dbsfile.keys(): # Then we have some parents for lfn in dbsfile['parentLFNs']: newFile = DBSBufferFile(lfn = lfn) dbsfile['parents'].add(newFile) del dbsfile['parentLFNs'] self.commitTransaction(existingTransaction) return dbsFiles
def addFileToDBS(self, jobReportFile, task, errorDataset=False): """ _addFileToDBS_ Add a file that was output from a job to the DBS buffer. """ datasetInfo = jobReportFile["dataset"] dbsFile = DBSBufferFile(lfn=jobReportFile["lfn"], size=jobReportFile["size"], events=jobReportFile["events"], checksums=jobReportFile["checksums"], status="NOTUPLOADED") dbsFile.setAlgorithm(appName=datasetInfo["applicationName"], appVer=datasetInfo["applicationVersion"], appFam=jobReportFile["module_label"], psetHash="GIBBERISH", configContent=jobReportFile.get('configURL')) if errorDataset: dbsFile.setDatasetPath( "/%s/%s/%s" % (datasetInfo["primaryDataset"] + "-Error", datasetInfo["processedDataset"], datasetInfo["dataTier"])) else: dbsFile.setDatasetPath( "/%s/%s/%s" % (datasetInfo["primaryDataset"], datasetInfo["processedDataset"], datasetInfo["dataTier"])) dbsFile.setValidStatus( validStatus=jobReportFile.get("validStatus", None)) dbsFile.setProcessingVer(ver=jobReportFile.get('processingVer', None)) dbsFile.setAcquisitionEra( era=jobReportFile.get('acquisitionEra', None)) dbsFile.setGlobalTag(globalTag=jobReportFile.get('globalTag', None)) #TODO need to find where to get the prep id dbsFile.setPrepID(prep_id=jobReportFile.get('prep_id', None)) dbsFile['task'] = task for run in jobReportFile["runs"]: newRun = Run(runNumber=run.run) newRun.extend(run.lumis) dbsFile.addRun(newRun) dbsFile.setLocation(pnn=list(jobReportFile["locations"])[0], immediateSave=False) self.dbsFilesToCreate.append(dbsFile) return
def loadDBSBufferFilesBulk(self, fileObjs): """ _loadDBSBufferFilesBulk_ Yes, this is a stupid place to put it. No, there's not better place. """ myThread = threading.currentThread() dbsFiles = [] existingTransaction = self.beginTransaction() binds = [] for f in fileObjs: binds.append(f["id"]) loadFiles = self.daoFactory(classname="DBSBufferFiles.LoadBulkFilesByID") results = loadFiles.execute(files=binds, conn=self.getDBConn(), transaction=self.existingTransaction()) for entry in results: # Add loaded information dbsfile = DBSBufferFile(id=entry["id"]) dbsfile.update(entry) dbsFiles.append(dbsfile) for dbsfile in dbsFiles: if "runInfo" in dbsfile.keys(): # Then we have to replace it with a real run for r in dbsfile["runInfo"].keys(): run = Run(runNumber=r) run.extend(dbsfile["runInfo"][r]) dbsfile.addRun(run) del dbsfile["runInfo"] if "parentLFNs" in dbsfile.keys(): # Then we have some parents for lfn in dbsfile["parentLFNs"]: newFile = DBSBufferFile(lfn=lfn) dbsfile["parents"].add(newFile) del dbsfile["parentLFNs"] self.commitTransaction(existingTransaction) return dbsFiles
def addFileToDBS(self, jobReportFile, task, errorDataset = False): """ _addFileToDBS_ Add a file that was output from a job to the DBS buffer. """ datasetInfo = jobReportFile["dataset"] dbsFile = DBSBufferFile(lfn = jobReportFile["lfn"], size = jobReportFile["size"], events = jobReportFile["events"], checksums = jobReportFile["checksums"], status = "NOTUPLOADED") dbsFile.setAlgorithm(appName = datasetInfo["applicationName"], appVer = datasetInfo["applicationVersion"], appFam = jobReportFile["module_label"], psetHash = "GIBBERISH", configContent = jobReportFile.get('configURL')) if errorDataset: dbsFile.setDatasetPath("/%s/%s/%s" % (datasetInfo["primaryDataset"] + "-Error", datasetInfo["processedDataset"], datasetInfo["dataTier"])) else: dbsFile.setDatasetPath("/%s/%s/%s" % (datasetInfo["primaryDataset"], datasetInfo["processedDataset"], datasetInfo["dataTier"])) dbsFile.setValidStatus(validStatus = jobReportFile.get("validStatus", None)) dbsFile.setProcessingVer(ver = jobReportFile.get('processingVer', None)) dbsFile.setAcquisitionEra(era = jobReportFile.get('acquisitionEra', None)) dbsFile.setGlobalTag(globalTag = jobReportFile.get('globalTag', None)) #TODO need to find where to get the prep id dbsFile.setPrepID(prep_id = jobReportFile.get('prep_id', None)) dbsFile['task'] = task for run in jobReportFile["runs"]: newRun = Run(runNumber = run.run) newRun.extend(run.lumis) dbsFile.addRun(newRun) dbsFile.setLocation(pnn = list(jobReportFile["locations"])[0], immediateSave = False) self.dbsFilesToCreate.append(dbsFile) return
def findUploadableFilesByDAS(self, das): """ _findUploadableDAS_ Find all the Dataset-Algo files available with uploadable files. """ myThread = threading.currentThread() existingTransaction = self.beginTransaction() dbsFiles = [] factory = DAOFactory(package = "WMComponent.DBSUpload.Database", logger = myThread.logger, dbinterface = myThread.dbi) findFiles = factory(classname = "LoadDBSFilesByDAS") results = findFiles.execute(das = das, conn = self.getDBConn(), transaction=self.existingTransaction()) for entry in results: # Add loaded information dbsfile = DBSBufferFile(id=entry['id']) dbsfile.update(entry) dbsFiles.append(dbsfile) for dbsfile in dbsFiles: if 'runInfo' in dbsfile.keys(): # Then we have to replace it with a real run for r in dbsfile['runInfo'].keys(): run = Run(runNumber = r) run.extend(dbsfile['runInfo'][r]) dbsfile.addRun(run) del dbsfile['runInfo'] if 'parentLFNs' in dbsfile.keys(): # Then we have some parents for lfn in dbsfile['parentLFNs']: newFile = DBSBufferFile(lfn = lfn) dbsfile['parents'].add(newFile) del dbsfile['parentLFNs'] self.commitTransaction(existingTransaction) return dbsFiles
def findUploadableFilesByDAS(self, das): """ _findUploadableDAS_ Find all the Dataset-Algo files available with uploadable files. """ myThread = threading.currentThread() existingTransaction = self.beginTransaction() dbsFiles = [] factory = DAOFactory(package="WMComponent.DBSUpload.Database", logger=myThread.logger, dbinterface=myThread.dbi) findFiles = factory(classname="LoadDBSFilesByDAS") results = findFiles.execute(das=das, conn=self.getDBConn(), transaction=self.existingTransaction()) for entry in results: # Add loaded information dbsfile = DBSBufferFile(id=entry['id']) dbsfile.update(entry) dbsFiles.append(dbsfile) for dbsfile in dbsFiles: if 'runInfo' in dbsfile.keys(): # Then we have to replace it with a real run for r in dbsfile['runInfo'].keys(): run = Run(runNumber=r) run.extend(dbsfile['runInfo'][r]) dbsfile.addRun(run) del dbsfile['runInfo'] if 'parentLFNs' in dbsfile.keys(): # Then we have some parents for lfn in dbsfile['parentLFNs']: newFile = DBSBufferFile(lfn=lfn) dbsfile['parents'].add(newFile) del dbsfile['parentLFNs'] self.commitTransaction(existingTransaction) return dbsFiles
def loadFilesFromBlocks(self, blockID): """ _loadFilesFromBlocks_ Load the files from all active blocks """ findFiles = self.factory(classname = "LoadFilesFromBlocks") myThread = threading.currentThread() existingTransaction = self.beginTransaction() dbsFiles = [] results = findFiles.execute(blockID = blockID, conn = self.getDBConn(), transaction=self.existingTransaction()) for entry in results: # Add loaded information dbsfile = DBSBufferFile(id=entry['id']) dbsfile.update(entry) dbsFiles.append(dbsfile) for dbsfile in dbsFiles: if 'runInfo' in dbsfile.keys(): # Then we have to replace it with a real run for r in dbsfile['runInfo'].keys(): run = Run(runNumber = r) run.extend(dbsfile['runInfo'][r]) dbsfile.addRun(run) del dbsfile['runInfo'] if 'parentLFNs' in dbsfile.keys(): # Then we have some parents for lfn in dbsfile['parentLFNs']: newFile = DBSBufferFile(lfn = lfn) dbsfile['parents'].add(newFile) del dbsfile['parentLFNs'] self.commitTransaction(existingTransaction) return dbsFiles
def loadFilesFromBlocks(self, blockID): """ _loadFilesFromBlocks_ Load the files from all active blocks """ findFiles = self.factory(classname="LoadFilesFromBlocks") myThread = threading.currentThread() existingTransaction = self.beginTransaction() dbsFiles = [] results = findFiles.execute(blockID=blockID, conn=self.getDBConn(), transaction=self.existingTransaction()) for entry in results: # Add loaded information dbsfile = DBSBufferFile(id=entry['id']) dbsfile.update(entry) dbsFiles.append(dbsfile) for dbsfile in dbsFiles: if 'runInfo' in dbsfile.keys(): # Then we have to replace it with a real run for r in dbsfile['runInfo'].keys(): run = Run(runNumber=r) run.extend(dbsfile['runInfo'][r]) dbsfile.addRun(run) del dbsfile['runInfo'] if 'parentLFNs' in dbsfile.keys(): # Then we have some parents for lfn in dbsfile['parentLFNs']: newFile = DBSBufferFile(lfn=lfn) dbsfile['parents'].add(newFile) del dbsfile['parentLFNs'] self.commitTransaction(existingTransaction) return dbsFiles
def findUploadableFilesByDAS(self, das): """ _findUploadableDAS_ Find all the Dataset-Algo files available with uploadable files. """ myThread = threading.currentThread() existingTransaction = self.beginTransaction() dbsFiles = [] findFiles = self.daoFactory(classname="LoadDBSFilesByDAS") results = findFiles.execute(das=das, conn=self.getDBConn(), transaction=self.existingTransaction()) for entry in results: # Add loaded information dbsfile = DBSBufferFile(id=entry["id"]) dbsfile.update(entry) dbsFiles.append(dbsfile) for dbsfile in dbsFiles: if "runInfo" in dbsfile.keys(): # Then we have to replace it with a real run for r in dbsfile["runInfo"].keys(): run = Run(runNumber=r) run.extend(dbsfile["runInfo"][r]) dbsfile.addRun(run) del dbsfile["runInfo"] if "parentLFNs" in dbsfile.keys(): # Then we have some parents for lfn in dbsfile["parentLFNs"]: newFile = DBSBufferFile(lfn=lfn) dbsfile["parents"].add(newFile) del dbsfile["parentLFNs"] self.commitTransaction(existingTransaction) return dbsFiles
def loadDBSBufferFilesBulk(self, fileObjs): """ _loadDBSBufferFilesBulk_ Yes, this is a stupid place to put it. No, there's not better place. """ dbsFiles = [] binds = [] for f in fileObjs: binds.append(f["id"]) loadFiles = self.daoFactory( classname="DBSBufferFiles.LoadBulkFilesByID") results = loadFiles.execute(files=binds, transaction=False) for entry in results: # Add loaded information dbsfile = DBSBufferFile(id=entry['id']) dbsfile.update(entry) dbsFiles.append(dbsfile) for dbsfile in dbsFiles: if 'runInfo' in dbsfile: # Then we have to replace it with a real run for r in dbsfile['runInfo']: run = Run(runNumber=r) run.extend(dbsfile['runInfo'][r]) dbsfile.addRun(run) del dbsfile['runInfo'] if 'parentLFNs' in dbsfile: # Then we have some parents for lfn in dbsfile['parentLFNs']: newFile = DBSBufferFile(lfn=lfn) dbsfile['parents'].add(newFile) del dbsfile['parentLFNs'] return dbsFiles
def getChunkFiles(self, collectionName, filesetName, chunkOffset, chunkSize=100): """ _getChunkFiles_ Retrieve a chunk of files from the given collection and task. """ chunkFiles = [] files = self._getFilesetInfo(collectionName, filesetName, chunkOffset, chunkSize) files = mergeFakeFiles(files) for fileInfo in files: newFile = File(lfn=fileInfo["lfn"], size=fileInfo["size"], events=fileInfo["events"], parents=set(fileInfo["parents"]), locations=set(fileInfo["locations"]), merged=fileInfo["merged"]) for run in fileInfo["runs"]: newRun = Run(run["run_number"]) newRun.extend(run["lumis"]) newFile.addRun(newRun) chunkFiles.append(newFile) return chunkFiles
def loadFilesByBlock(self, blockname): """ _loadFilesByBlock_ Get all files associated with a block """ dbsFiles = [] existingTransaction = self.beginTransaction() findFiles = self.daoFactory(classname="LoadFilesByBlock") results = findFiles.execute(blockname=blockname, conn=self.getDBConn(), transaction=self.existingTransaction()) for entry in results: # Add loaded information dbsfile = DBSBufferFile(id=entry["id"]) dbsfile.update(entry) dbsFiles.append(dbsfile) for dbsfile in dbsFiles: if "runInfo" in dbsfile.keys(): # Then we have to replace it with a real run for r in dbsfile["runInfo"].keys(): run = Run(runNumber=r) run.extend(dbsfile["runInfo"][r]) dbsfile.addRun(run) del dbsfile["runInfo"] if "parentLFNs" in dbsfile.keys(): # Then we have some parents for lfn in dbsfile["parentLFNs"]: newFile = DBSBufferFile(lfn=lfn) dbsfile["parents"].add(newFile) del dbsfile["parentLFNs"] self.commitTransaction(existingTransaction) return dbsFiles
def loadDBSBufferFilesBulk(self, fileObjs): """ _loadDBSBufferFilesBulk_ Yes, this is a stupid place to put it. No, there's not better place. """ dbsFiles = [] binds = [] for f in fileObjs: binds.append(f["id"]) loadFiles = self.daoFactory(classname = "DBSBufferFiles.LoadBulkFilesByID") results = loadFiles.execute(files = binds, transaction = False) for entry in results: # Add loaded information dbsfile = DBSBufferFile(id=entry['id']) dbsfile.update(entry) dbsFiles.append(dbsfile) for dbsfile in dbsFiles: if 'runInfo' in dbsfile.keys(): # Then we have to replace it with a real run for r in dbsfile['runInfo'].keys(): run = Run(runNumber = r) run.extend(dbsfile['runInfo'][r]) dbsfile.addRun(run) del dbsfile['runInfo'] if 'parentLFNs' in dbsfile.keys(): # Then we have some parents for lfn in dbsfile['parentLFNs']: newFile = DBSBufferFile(lfn = lfn) dbsfile['parents'].add(newFile) del dbsfile['parentLFNs'] return dbsFiles
def getChunkFiles(self, collectionName, filesetName, chunkOffset, chunkSize=100, user="******", group="cmsdataops"): """ _getChunkFiles_ Retrieve a chunk of files from the given collection and task. """ chunkFiles = [] result = self.couchdb.loadView( "ACDC", "owner_coll_fileset_files", { "startkey": [group, user, collectionName, filesetName], "endkey": [group, user, collectionName, filesetName, {}], "limit": chunkSize, "skip": chunkOffset, }, []) for row in result["rows"]: resultRow = row['value'] newFile = File(lfn=resultRow["lfn"], size=resultRow["size"], events=resultRow["events"], parents=set(resultRow["parents"]), locations=set(resultRow["locations"]), merged=resultRow["merged"]) for run in resultRow["runs"]: newRun = Run(run["run_number"]) newRun.extend(run["lumis"]) newFile.addRun(newRun) chunkFiles.append(newFile) return chunkFiles