def testC_FailTest(self): """ _FailTest_ THIS TEST IS DANGEROUS! Figure out what happens when we trigger rollbacks """ myThread = threading.currentThread() config = self.createConfig() config.DBSUpload.abortStepTwo = True originalOut = sys.stdout originalErr = sys.stderr dbsInterface = DBSInterface(config = config) localAPI = dbsInterface.getAPIRef() globeAPI = dbsInterface.getAPIRef(globalRef = True) name = "ThisIsATest_%s" % (makeUUID()) tier = "RECO" nFiles = 12 files = self.getFiles(name = name, tier = tier, nFiles = nFiles) datasetPath = '/%s/%s/%s' % (name, name, tier) testDBSUpload = DBSUploadPoller(config = config) try: testDBSUpload.algorithm() except Exception, ex: pass
def testF_DBSUploadQueueSizeCheckForAlerts(self): """ Test will not trigger a real alert being sent unless doing some mocking of the methods used during DBSUploadPoller.algorithm() -> DBSUploadPoller.uploadBlocks() method. As done here, it probably can't be deterministic, yet the feature shall be checked. """ sizeLevelToTest = 1 myThread = threading.currentThread() config = self.createConfig() # threshold / value to check config.DBSUpload.alertUploadQueueSize = sizeLevelToTest # without this uploadBlocks method returns immediately name = "ThisIsATest_%s" % (makeUUID()) tier = "RECO" nFiles = sizeLevelToTest + 1 files = self.getFiles(name = name, tier = tier, nFiles = nFiles) datasetPath = '/%s/%s/%s' % (name, name, tier) # load components that are necessary to check status # (this seems necessary, else some previous tests started failing) factory = WMFactory("dbsUpload", "WMComponent.DBSUpload.Database.Interface") dbinterface = factory.loadObject("UploadToDBS") dbsInterface = DBSInterface(config = config) localAPI = dbsInterface.getAPIRef() globeAPI = dbsInterface.getAPIRef(globalRef = True) testDBSUpload = DBSUploadPoller(config) # this is finally where the action (alert) should be triggered from testDBSUpload.algorithm() return
def testC_FailTest(self): """ _FailTest_ THIS TEST IS DANGEROUS! Figure out what happens when we trigger rollbacks """ myThread = threading.currentThread() config = self.createConfig() config.DBSUpload.abortStepTwo = True originalOut = sys.stdout originalErr = sys.stderr dbsInterface = DBSInterface(config=config) localAPI = dbsInterface.getAPIRef() globeAPI = dbsInterface.getAPIRef(globalRef=True) name = "ThisIsATest_%s" % (makeUUID()) tier = "RECO" nFiles = 12 files = self.getFiles(name=name, tier=tier, nFiles=nFiles) datasetPath = '/%s/%s/%s' % (name, name, tier) testDBSUpload = DBSUploadPoller(config=config) try: testDBSUpload.algorithm() except Exception, ex: pass
def testE_NoMigration(self): """ _NoMigration_ Test the DBSUpload system with no global migration """ myThread = threading.currentThread() config = self.createConfig() self.injectWorkflow(MaxWaitTime=3) config.DBSInterface.doGlobalMigration = False config.DBSUpload.pollInterval = 4 name = "ThisIsATest_%s" % (makeUUID()) tier = "RECO" nFiles = 12 files = self.getFiles(name=name, tier=tier, nFiles=nFiles) datasetPath = '/%s/%s/%s' % (name, name, tier) # Load components that are necessary to check status factory = WMFactory("dbsUpload", "WMComponent.DBSUpload.Database.Interface") dbinterface = factory.loadObject("UploadToDBS") dbsInterface = DBSInterface(config=config) localAPI = dbsInterface.getAPIRef() globeAPI = dbsInterface.getAPIRef(globalRef=True) # In the first round we should create blocks for the first dataset # The child dataset should not be handled until the parent is uploaded testDBSUpload = DBSUploadPoller(config=config) testDBSUpload.algorithm() # First, see if there are any blocks # One in DBS, one not in DBS result = myThread.dbi.processData( "SELECT status FROM dbsbuffer_block")[0].fetchall() self.assertEqual(len(result), 2) self.assertEqual(result, [('InGlobalDBS', ), ('Open', )]) result = myThread.dbi.processData( "SELECT status FROM dbsbuffer_file WHERE dataset_algo = 1" )[0].fetchall() for r in result: self.assertEqual(r[0], 'GLOBAL') return
def __init__(self, config, dbsconfig=None): """ Initialise class members """ myThread = threading.currentThread() BaseWorkerThread.__init__(self) self.config = config # This is slightly dangerous, but DBSUpload depends # on DBSInterface anyway self.bufferFactory = DAOFactory( package="WMComponent.DBSBuffer.Database", logger=myThread.logger, dbinterface=myThread.dbi) factory = WMFactory("dbsUpload", "WMComponent.DBSUpload.Database.Interface") self.uploadToDBS = factory.loadObject("UploadToDBS") addFactory = WMFactory("dbsBuffer", "WMComponent.DBSBuffer.Database.Interface") self.addToBuffer = addFactory.loadObject("AddToBuffer") # Set DBSInterface self.dbsInterface = DBSInterface(config=config) # Set DAOs self.setBlock = self.bufferFactory(classname="DBSBufferFiles.SetBlock") self.setStatus = self.bufferFactory( classname="DBSBufferFiles.SetStatus") # Set config parameters self.doMigration = getattr(self.config.DBSInterface, 'doGlobalMigration', True) if dbsconfig == None: self.dbsconfig = config # initialize the alert framework (if available - config.Alert present) # self.sendAlert will be then be available self.initAlerts(compName="DBSUpload") return
def testE_NoMigration(self): """ _NoMigration_ Test the DBSUpload system with no global migration """ myThread = threading.currentThread() config = self.createConfig() self.injectWorkflow(MaxWaitTime = 3) config.DBSInterface.doGlobalMigration = False config.DBSUpload.pollInterval = 4 name = "ThisIsATest_%s" % (makeUUID()) tier = "RECO" nFiles = 12 files = self.getFiles(name = name, tier = tier, nFiles = nFiles) datasetPath = '/%s/%s/%s' % (name, name, tier) # Load components that are necessary to check status factory = WMFactory("dbsUpload", "WMComponent.DBSUpload.Database.Interface") dbinterface = factory.loadObject("UploadToDBS") dbsInterface = DBSInterface(config = config) localAPI = dbsInterface.getAPIRef() globeAPI = dbsInterface.getAPIRef(globalRef = True) # In the first round we should create blocks for the first dataset # The child dataset should not be handled until the parent is uploaded testDBSUpload = DBSUploadPoller(config = config) testDBSUpload.algorithm() # First, see if there are any blocks # One in DBS, one not in DBS result = myThread.dbi.processData("SELECT status FROM dbsbuffer_block")[0].fetchall() self.assertEqual(len(result), 2) self.assertEqual(result, [('InGlobalDBS',), ('Open',)]) result = myThread.dbi.processData("SELECT status FROM dbsbuffer_file WHERE dataset_algo = 1")[0].fetchall() for r in result: self.assertEqual(r[0], 'GLOBAL') return
def testF_DBSUploadQueueSizeCheckForAlerts(self): """ Test will not trigger a real alert being sent unless doing some mocking of the methods used during DBSUploadPoller.algorithm() -> DBSUploadPoller.uploadBlocks() method. As done here, it probably can't be deterministic, yet the feature shall be checked. """ sizeLevelToTest = 1 myThread = threading.currentThread() config = self.createConfig() # threshold / value to check config.DBSUpload.alertUploadQueueSize = sizeLevelToTest # without this uploadBlocks method returns immediately name = "ThisIsATest_%s" % (makeUUID()) tier = "RECO" nFiles = sizeLevelToTest + 1 files = self.getFiles(name=name, tier=tier, nFiles=nFiles) datasetPath = '/%s/%s/%s' % (name, name, tier) # load components that are necessary to check status # (this seems necessary, else some previous tests started failing) factory = WMFactory("dbsUpload", "WMComponent.DBSUpload.Database.Interface") dbinterface = factory.loadObject("UploadToDBS") dbsInterface = DBSInterface(config=config) localAPI = dbsInterface.getAPIRef() globeAPI = dbsInterface.getAPIRef(globalRef=True) testDBSUpload = DBSUploadPoller(config) # this is finally where the action (alert) should be triggered from testDBSUpload.algorithm() return
def __init__(self, config, dbsconfig = None): """ Initialise class members """ myThread = threading.currentThread() BaseWorkerThread.__init__(self) self.config = config # This is slightly dangerous, but DBSUpload depends # on DBSInterface anyway self.bufferFactory = DAOFactory(package = "WMComponent.DBSBuffer.Database", logger = myThread.logger, dbinterface = myThread.dbi) factory = WMFactory("dbsUpload", "WMComponent.DBSUpload.Database.Interface") self.uploadToDBS = factory.loadObject("UploadToDBS") addFactory = WMFactory("dbsBuffer", "WMComponent.DBSBuffer.Database.Interface") self.addToBuffer = addFactory.loadObject("AddToBuffer") # Set DBSInterface self.dbsInterface = DBSInterface(config = config) # Set DAOs self.setBlock = self.bufferFactory(classname = "DBSBufferFiles.SetBlock") self.setStatus = self.bufferFactory(classname = "DBSBufferFiles.SetStatus") # Set config parameters self.doMigration = getattr(self.config.DBSInterface, 'doGlobalMigration', True) if dbsconfig == None: self.dbsconfig = config # initialize the alert framework (if available - config.Alert present) # self.sendAlert will be then be available self.initAlerts(compName = "DBSUpload") return
class DBSUploadPoller(BaseWorkerThread): """ Handles poll-based DBSUpload """ def __init__(self, config, dbsconfig = None): """ Initialise class members """ myThread = threading.currentThread() BaseWorkerThread.__init__(self) self.config = config # This is slightly dangerous, but DBSUpload depends # on DBSInterface anyway self.bufferFactory = DAOFactory(package = "WMComponent.DBSBuffer.Database", logger = myThread.logger, dbinterface = myThread.dbi) factory = WMFactory("dbsUpload", "WMComponent.DBSUpload.Database.Interface") self.uploadToDBS = factory.loadObject("UploadToDBS") addFactory = WMFactory("dbsBuffer", "WMComponent.DBSBuffer.Database.Interface") self.addToBuffer = addFactory.loadObject("AddToBuffer") # Set DBSInterface self.dbsInterface = DBSInterface(config = config) # Set DAOs self.setBlock = self.bufferFactory(classname = "DBSBufferFiles.SetBlock") self.setStatus = self.bufferFactory(classname = "DBSBufferFiles.SetStatus") # Set config parameters self.doMigration = getattr(self.config.DBSInterface, 'doGlobalMigration', True) if dbsconfig == None: self.dbsconfig = config # initialize the alert framework (if available - config.Alert present) # self.sendAlert will be then be available self.initAlerts(compName = "DBSUpload") return def algorithm(self, parameters = None): """ Runs over all available DBSBuffer filesets/algos Commits them using DBSInterface Then checks blocks for timeout """ logging.debug("Running subscription / fileset matching algorithm") try: self.sortBlocks() self.uploadBlocks() except WMException: raise except Exception as ex: msg = "Unhandled exception in DBSUploadPoller\n" msg += str(ex) msg += str(traceback.format_exc()) logging.error(msg) self.sendAlert(6, msg = msg) raise DBSUploadPollerException(msg) return def terminate(self, params): """ Do one more pass, then terminate """ logging.debug("Terminating. Doing one more pass before we die") self.algorithm(params) return def sortBlocks(self): """ _sortBlocks_ Find new files to upload, sort them into blocks Save the blocks in DBSBuffer """ myThread = threading.currentThread() # Grab all the Dataset-Algo combindations dasList = self.uploadToDBS.findUploadableDAS() logging.debug("Recovered %i DAS to upload" % len(dasList)) for dasInfo in dasList: # Go one DAS at a time dasID = dasInfo['DAS_ID'] logging.info("Processing DAS %i" % dasID) # Initial values readyBlocks = [] fileLFNs = [] # Get the dataset-algo information #algo = createAlgoFromInfo(info = dasInfo) dataset = createDatasetFromInfo(info = dasInfo) # Get the files for the DAS files = self.uploadToDBS.findUploadableFilesByDAS(das = dasID) if len(files) < 1: # Then we have no files for this DAS logging.debug("DAS %i has no available files. Continuing." % dasID) continue # Load the blocks for the DAS blocks = self.uploadToDBS.loadBlocksByDAS(das = dasID) logging.debug("Retrieved %i files and %i blocks from DB." % (len(files), len(blocks))) # Sort the files and blocks by location locationDict = sortListByKey(files, 'locations') blockDict = sortListByKey(blocks, 'location') logging.debug("Active DAS file locations: %s" % locationDict.keys()) logging.debug("Active Block file locations: %s" % blockDict.keys()) try: # Sort files that are already in blocks # back into those blocks # pass by reference blockDict, locationDict = preassignBlocks(files = locationDict, blocks = blockDict) # Now go over all the files for location in locationDict.keys(): # Split files into blocks locFiles = locationDict.get(location, []) locBlocks = blockDict.get(location, []) locBlocks = self.splitFilesIntoBlocks(files = locFiles, blocks = locBlocks, dataset = dataset, location = location) readyBlocks.extend(locBlocks) except WMException: raise except Exception as ex: msg = "Unhandled exception while sorting files into blocks for DAS %i\n" % dasID msg += str(ex) msg += str(traceback.format_exc()) logging.error(msg) self.sendAlert(6, msg = msg) logging.debug("BlockDictionary: %s" % blockDict) logging.debug("FileDictionary: %s" % locationDict) raise DBSUploadPollerException(msg) # At this point, all blocks should be in readyBlocks # STEP TWO: Commit blocks to DBSBuffer fileLFNs = self.createBlocksInDBSBuffer(readyBlocks = readyBlocks) # Now we should have all the blocks in DBSBuffer # Time to set the status of the files lfnList = [x['lfn'] for x in files] self.setStatus.execute(lfns = lfnList, status = "READY", conn = myThread.transaction.conn, transaction = myThread.transaction) # All files that were in NOTUPLOADED # And had uploaded parents # Should now be in assigned to blocks in DBSBuffer, and in the READY status return def uploadBlocks(self): """ _uploadBlocks_ Load all OPEN blocks out of the database with all their necessary files Once we have the blocks, determine which ones are ready to be uploaded Also determine which ones are ready to be migrated Upload them """ myThread = threading.currentThread() # Get the blocks # This should grab all Pending and Open blocks blockInfo = self.uploadToDBS.loadBlocks() blocks = [] if len(blockInfo) < 1: # Then we have no block, and probably no files logging.info("No blocks in this iteration. Returning") return # Assemble the blocks for info in blockInfo: block = createBlock(datasetPath = 'blank', location = 'blank') block['id'] = info['id'] block['das'] = info['das'] block['Name'] = info['blockname'] block['CreationDate'] = info['create_time'] block['open'] = info['open'] block['MaxCloseTime'] = info['block_close_max_wait_time'] blocks.append(block) dasIDs = [] for block in blocks: if block['das'] not in dasIDs: dasIDs.append(block['das']) dasAlgoDataset = {} dasAlgoInfo = self.uploadToDBS.loadDASInfoByID(ids = dasIDs) for dasInfo in dasAlgoInfo: algo = createAlgoFromInfo(info = dasInfo) dataset = createDatasetFromInfo(info = dasInfo) dasAlgoDataset[dasInfo['DAS_ID']] = {'dataset': dataset, 'algo': algo} # At this point we should have the dataset and algo information # The blocks # And the files # Time to sort the files into blocks # the counter / watcher of the alertUploadQueueSize to possibly send alerts alertUploadQueueSize = getattr(self.config.DBSUpload, "alertUploadQueueSize", None) alertUploadQueueSizeCounter = 0 for block in blocks: files = self.uploadToDBS.loadFilesFromBlocks(blockID = block['id']) for f in files: if f['blockID'] == block['id']: # Put file in this block logging.debug("Setting file %s to block %s" % (f['lfn'], block['Name'])) block['newFiles'].append(f) alertUploadQueueSizeCounter += 1 # check alertUploadQueueSize threshold (alert condition) if alertUploadQueueSize: if alertUploadQueueSizeCounter >= int(alertUploadQueueSize): msg = ("DBS upload queue size (%s) exceeded configured " "threshold (%s)." % (alertUploadQueueSizeCounter, alertUploadQueueSize)) self.sendAlert(6, msg = msg) # Check for block timeout for block in blocks: if time.time() - block['CreationDate'] > block['MaxCloseTime']: logging.info("Setting status to Pending due to timeout for block %s" % block['Name']) block['open'] = 'Pending' # Should have files in blocks, now assign them to DAS for dasID in dasAlgoDataset.keys(): readyBlocks = [] dataset = dasAlgoDataset[dasID]['dataset'] algo = dasAlgoDataset[dasID]['algo'] for block in blocks: if len(block['newFiles']) > 0: # Assign a location from the files logging.debug("Block %s has %i files" % (block['Name'], len(block['newFiles']))) block['location'] = list(block['newFiles'][0]['locations'])[0] if block['das'] == dasID: if block['open'] == 'Pending': # Always attach pending blocks logging.debug("Attaching block %s" % block['Name']) readyBlocks.append(block) elif len(block['newFiles']) > 0: # Else you only deal with blocks if they have new files logging.debug("Attaching block %s" % block['Name']) readyBlocks.append(block) if len(readyBlocks) < 1: # Nothing to do logging.debug("Nothing to do for DAS %i in uploadBlocks" % dasID) continue try: # Now do the real action of transferring crap # Damn it Anzar: Why does DBS print stuff out? for singleBlock in readyBlocks: originalOut = sys.stdout originalErr = sys.stderr sys.stdout = open(os.devnull, 'w') sys.stderr = open(os.devnull, 'w') if getattr(self.config.DBSUpload, 'abortStepThree', False): # Blow the stack for testing purposes raise DBSUploadPollerException('None') logging.info("About to upload to DBS for DAS %i with %i blocks" % (dasID, len(readyBlocks))) affBlocks = self.dbsInterface.runDBSBuffer(algo = algo, dataset = dataset, blocks = [singleBlock]) sys.stdout = originalOut sys.stderr = originalErr # Update DBSBuffer with current information myThread.transaction.begin() for block in affBlocks: logging.info("Successfully inserted %i files for block %s." % (len(block['insertedFiles']), block['Name'])) self.uploadToDBS.setBlockStatus(block = block['Name'], locations = [block['location']], openStatus = block['open']) if block['open'] == 'InGlobalDBS' or not self.doMigration: # Set block files as in global if they've been migrated. # If we aren't doing global migrations, all files are in global logging.debug("Block %s now listed in global DBS" % block['Name']) self.uploadToDBS.closeBlockFiles(blockname = block['Name'], status = 'GLOBAL') else: logging.debug("Block %s now uploaded to local DBS" % block['Name']) self.uploadToDBS.closeBlockFiles(blockname = block['Name'], status = 'LOCAL') logging.debug("About to do post-upload DBS commit for DAS %i" % dasID) myThread.transaction.commit() # New plan: If we get an error in trying to commit a block to DBS # then we just rollback the transaction and continue to the next # block - ignoring the exception except WMException: if getattr(myThread, 'transaction', None) != None: myThread.transaction.rollbackForError() pass #raise except Exception as ex: msg = 'Error in committing files to DBS\n' msg += str(ex) msg += str(traceback.format_exc()) logging.error(msg) self.sendAlert(6, msg = msg) if getattr(myThread, 'transaction', None) != None: myThread.transaction.rollbackForError() pass #raise DBSUploadPollerException(msg) return def splitFilesIntoBlocks(self, files, blocks, dataset, location): """ Break the files into blocks based on config params Create a new block when necessary. """ blocksToHandle = [] if len(blocks) > 1: # Well, then we have a bit of a problem # Decide what to do about this later logging.error("More then one open block for this DAS") if len(blocks) == 0: currentBlock = createBlock(datasetPath = dataset['Path'], location = location) else: currentBlock = blocks[0] for newFile in files: # Check to see if blocks are full if not self.isBlockOpen(block = currentBlock): # Add old block to return list # Create a new block currentBlock['open'] = 'Pending' blocksToHandle.append(currentBlock) currentBlock = createBlock(datasetPath = dataset['Path'], location = location) # Check if the file has the closing settings, otherwise plug in the settings from the newFile if currentBlock['MaxCloseTime'] is None or currentBlock['MaxCloseFiles'] is None \ or currentBlock['MaxCloseSize'] is None or currentBlock['MaxCloseEvents'] is None: currentBlock['MaxCloseTime'] = newFile['block_close_max_wait_time'] currentBlock['MaxCloseEvents'] = newFile['block_close_max_events'] currentBlock['MaxCloseFiles'] = newFile['block_close_max_files'] currentBlock['MaxCloseSize'] = newFile['block_close_max_size'] # Now process the file currentBlock['newFiles'].append(newFile) currentBlock['BlockSize'] += newFile['size'] currentBlock['NumberOfFiles'] += 1 currentBlock['NumberOfEvents'] += newFile['events'] if currentBlock['NumberOfFiles'] > 0: blocksToHandle.append(currentBlock) return blocksToHandle def isBlockOpen(self, block): """ _isBlockOpen_ Tells you if the block should be closed """ if block['MaxCloseTime'] is None or block['MaxCloseFiles'] is None \ or block['MaxCloseSize'] is None or block['MaxCloseEvents'] is None: return True if time.time() - int(block.get('CreationDate', 0)) >= block['MaxCloseTime']: # We've timed out on this block return False if block['NumberOfFiles'] >= block['MaxCloseFiles']: # We've got too many files return False if float(block.get('BlockSize')) >= block['MaxCloseSize']: # Block is too big return False if block['NumberOfEvents'] >= block['MaxCloseEvents']: # Too many events in the block return False return True def createBlocksInDBSBuffer(self, readyBlocks): """ _createBlocksInDBSBuffer_ Create the blocks in the local database in their initial states. """ myThread = threading.currentThread() fileLFNs = [] try: # Do this in its own transaction myThread.transaction.begin() for block in readyBlocks: # First insert each block logging.info("Prepping block %s for DBS with status %s" % (block['Name'], block['open'])) self.uploadToDBS.setBlockStatus(block = block['Name'], locations = [block['location']], openStatus = block['open'], time = int(block['CreationDate'])) # Then insert files from each block blockFileList = [] for f in block.get('newFiles', []): blockFileList.append(f['lfn']) if len(blockFileList) > 0: self.setBlock.execute(lfn = blockFileList, blockName = block['Name'], conn = myThread.transaction.conn, transaction = myThread.transaction) fileLFNs.extend(blockFileList) if getattr(self.config.DBSUpload, 'abortStepTwo', False): # Blow the stack for testing purposes raise DBSUploadPollerException('None') logging.debug("Committing transaction at the end of DBSBuffer insertion.") myThread.transaction.commit() except WMException as ex: if getattr(myThread, 'transaction', None) != None: myThread.transaction.rollback() raise except Exception as ex: msg = 'Error in committing blocks to DBSBuffer\n' msg += str(ex) msg += str(traceback.format_exc()) logging.error(msg) self.sendAlert(6, msg = msg) if getattr(myThread, 'transaction', None) != None: myThread.transaction.rollback() raise DBSUploadPollerException(msg) return fileLFNs
def testC_FailTest(self): """ _FailTest_ THIS TEST IS DANGEROUS! Figure out what happens when we trigger rollbacks """ myThread = threading.currentThread() config = self.createConfig() config.DBSUpload.abortStepTwo = True originalOut = sys.stdout originalErr = sys.stderr dbsInterface = DBSInterface(config=config) localAPI = dbsInterface.getAPIRef() globeAPI = dbsInterface.getAPIRef(globalRef=True) name = "ThisIsATest_%s" % (makeUUID()) tier = "RECO" nFiles = 12 files = self.getFiles(name=name, tier=tier, nFiles=nFiles) datasetPath = '/%s/%s/%s' % (name, name, tier) testDBSUpload = DBSUploadPoller(config=config) try: testDBSUpload.algorithm() except Exception as ex: pass # Aborting in step two should result in no results result = myThread.dbi.processData( "SELECT status FROM dbsbuffer_block")[0].fetchall() self.assertEqual(len(result), 0) config.DBSUpload.abortStepTwo = False config.DBSUpload.abortStepThree = True testDBSUpload = DBSUploadPoller(config=config) try: testDBSUpload.algorithm() except Exception as ex: pass result = myThread.dbi.processData( "SELECT status FROM dbsbuffer_block")[0].fetchall() self.assertEqual(result, [('Pending', ), ('Open', )]) result = myThread.dbi.processData( "SELECT status FROM dbsbuffer_file WHERE dataset_algo = 1" )[0].fetchall() for res in result: self.assertEqual(res[0], 'READY') config.DBSUpload.abortStepThree = False self.injectWorkflow(MaxWaitTime=300) testDBSUpload = DBSUploadPoller(config=config) testDBSUpload.algorithm() # After this, one block should have been uploaded, one should still be open # This is the result of the pending block updating, and the open block staying open result = myThread.dbi.processData( "SELECT status, id FROM dbsbuffer_block")[0].fetchall() self.assertEqual(result, [('InGlobalDBS', 3), ('Open', 4)]) # Check that one block got there result = listBlocks(apiRef=globeAPI, datasetPath=datasetPath) self.assertEqual(len(result), 1) self.assertEqual(result[0]['NumberOfFiles'], 10) self.assertEqual(result[0]['NumberOfEvents'], 200) self.assertEqual(result[0]['BlockSize'], 10240) # Check that ten files got there result = listDatasetFiles(apiRef=globeAPI, datasetPath=datasetPath) self.assertEqual(len(result), 10) myThread.dbi.processData( "UPDATE dbsbuffer_workflow SET block_close_max_wait_time = 1") testDBSUpload = DBSUploadPoller(config=config) time.sleep(3) testDBSUpload.algorithm() result = myThread.dbi.processData( "SELECT status FROM dbsbuffer_block")[0].fetchall() self.assertEqual(result, [('InGlobalDBS', ), ('InGlobalDBS', )]) result = listDatasetFiles(apiRef=globeAPI, datasetPath=datasetPath) self.assertEqual(len(result), 12) fileLFNs = [x['lfn'] for x in files] for lfn in fileLFNs: self.assertTrue(lfn in result) testDBSUpload.algorithm() result = myThread.dbi.processData( "SELECT status FROM dbsbuffer_block")[0].fetchall() self.assertEqual(result, [('InGlobalDBS', ), ('InGlobalDBS', ), ('Open', )]) time.sleep(5) testDBSUpload.algorithm() time.sleep(2) result = myThread.dbi.processData( "SELECT status FROM dbsbuffer_block")[0].fetchall() self.assertEqual(result, [('InGlobalDBS', ), ('InGlobalDBS', ), ('InGlobalDBS', )]) result = listDatasetFiles(apiRef=globeAPI, datasetPath='/%s/%s_2/%s' % (name, name, tier)) self.assertEqual(len(result), 1) sys.stdout = originalOut sys.stderr = originalErr return
def testB_AlgoMigration(self): """ _AlgoMigration_ Test our ability to migrate multiple algos to global Do this by creating, mid-poll, two separate batches of files One with the same dataset but a different algo One with the same algo, but a different dataset See that they both get to global """ #raise nose.SkipTest myThread = threading.currentThread() config = self.createConfig() self.injectWorkflow(MaxWaitTime=20) name = "ThisIsATest_%s" % (makeUUID()) tier = "RECO" nFiles = 12 files = self.getFiles(name=name, tier=tier, nFiles=nFiles) datasetPath = '/%s/%s/%s' % (name, name, tier) # Load components that are necessary to check status factory = WMFactory("dbsUpload", "WMComponent.DBSUpload.Database.Interface") dbinterface = factory.loadObject("UploadToDBS") dbsInterface = DBSInterface(config=config) localAPI = dbsInterface.getAPIRef() globeAPI = dbsInterface.getAPIRef(globalRef=True) testDBSUpload = DBSUploadPoller(config=config) testDBSUpload.algorithm() # There should now be one block result = listBlocks(apiRef=globeAPI, datasetPath=datasetPath) self.assertEqual(len(result), 1) # Okay, by now, the first migration should have gone through. # Now create a second batch of files with the same dataset # but a different algo. for i in range(0, nFiles): testFile = DBSBufferFile(lfn='%s-batch2-%i' % (name, i), size=1024, events=20, checksums={'cksum': 1}, locations="malpaquet") testFile.setAlgorithm(appName="cmsRun", appVer="CMSSW_3_1_1", appFam=tier, psetHash="GIBBERISH_PART2", configContent=self.configURL) testFile.setDatasetPath(datasetPath) testFile.addRun(Run(1, *[46])) testFile.create() # Have to do things twice to get parents testDBSUpload.algorithm() testDBSUpload.algorithm() # There should now be two blocks result = listBlocks(apiRef=globeAPI, datasetPath=datasetPath) self.assertEqual(len(result), 2) # Now create another batch of files with the original algo # But in a different dataset for i in range(0, nFiles): testFile = DBSBufferFile(lfn='%s-batch3-%i' % (name, i), size=1024, events=20, checksums={'cksum': 1}, locations="malpaquet") testFile.setAlgorithm(appName=name, appVer="CMSSW_3_1_1", appFam=tier, psetHash="GIBBERISH", configContent=self.configURL) testFile.setDatasetPath('/%s/%s_3/%s' % (name, name, tier)) testFile.addRun(Run(1, *[46])) testFile.create() # Do it twice for parentage. testDBSUpload.algorithm() testDBSUpload.algorithm() # There should now be one block result = listBlocks(apiRef=globeAPI, datasetPath='/%s/%s_3/%s' % (name, name, tier)) self.assertEqual(len(result), 1) # Well, all the blocks got there, so we're done return
def testA_basicUploadTest(self): """ _basicUploadTest_ Do everything simply once Create dataset, algo, files, blocks, upload them, mark as done, finish them, migrate them Also check the timeout """ myThread = threading.currentThread() config = self.createConfig() self.injectWorkflow(MaxWaitTime=3) config.DBSUpload.pollInterval = 4 name = "ThisIsATest_%s" % (makeUUID()) tier = "RECO" nFiles = 12 files = self.getFiles(name=name, tier=tier, nFiles=nFiles) datasetPath = '/%s/%s/%s' % (name, name, tier) # Load components that are necessary to check status factory = WMFactory("dbsUpload", "WMComponent.DBSUpload.Database.Interface") dbinterface = factory.loadObject("UploadToDBS") dbsInterface = DBSInterface(config=config) localAPI = dbsInterface.getAPIRef() globeAPI = dbsInterface.getAPIRef(globalRef=True) # In the first round we should create blocks for the first dataset # The child dataset should not be handled until the parent is uploaded testDBSUpload = DBSUploadPoller(config=config) testDBSUpload.algorithm() # First, see if there are any blocks # One in DBS, one not in DBS result = myThread.dbi.processData( "SELECT status FROM dbsbuffer_block")[0].fetchall() self.assertEqual(len(result), 2) self.assertEqual(result, [('InGlobalDBS', ), ('Open', )]) # Check to see if datasets and algos are in local DBS result = listAlgorithms(apiRef=localAPI, patternExe=name) self.assertEqual(len(result), 1) self.assertEqual(result[0]['ExecutableName'], name) result = listPrimaryDatasets(apiRef=localAPI, match=name) self.assertEqual(result, [name]) result = listProcessedDatasets(apiRef=localAPI, primary=name, dataTier="*") # Then check and see that the closed block made it into local DBS affectedBlocks = listBlocks(apiRef=localAPI, datasetPath=datasetPath) if affectedBlocks[0]['OpenForWriting'] == '0': self.assertEqual(affectedBlocks[1]['OpenForWriting'], '1') self.assertEqual(affectedBlocks[0]['NumberOfFiles'], 10) self.assertEqual(affectedBlocks[1]['NumberOfFiles'], 2) else: self.assertEqual(affectedBlocks[0]['OpenForWriting'], '1') self.assertEqual(affectedBlocks[1]['NumberOfFiles'], 10) self.assertEqual(affectedBlocks[0]['NumberOfFiles'], 2) # Check to make sure all the files are in local result = listDatasetFiles(apiRef=localAPI, datasetPath=datasetPath) fileLFNs = [x['lfn'] for x in files] for lfn in fileLFNs: self.assertTrue(lfn in result) # Make sure the child files aren't there flag = False try: listDatasetFiles(apiRef=localAPI, datasetPath='/%s/%s_2/%s' % (name, name, tier)) except Exception as ex: flag = True self.assertTrue(flag) # There should be one blocks in global # It should have ten files and be closed result = listBlocks(apiRef=globeAPI, datasetPath=datasetPath) self.assertEqual(len(result), 1) for block in result: self.assertEqual(block['OpenForWriting'], '0') self.assertTrue(block['NumberOfFiles'] in [2, 10]) # Okay, deep breath. First round done # In the second round, the second block of the parent fileset should transfer # Make sure that the timeout functions work time.sleep(10) testDBSUpload.algorithm() result = myThread.dbi.processData( "SELECT status FROM dbsbuffer_block")[0].fetchall() self.assertEqual(len(result), 2) self.assertEqual(result, [('InGlobalDBS', ), ('InGlobalDBS', )]) # Check to make sure all the files are in global result = listDatasetFiles(apiRef=globeAPI, datasetPath=datasetPath) for lfn in fileLFNs: self.assertTrue(lfn in result) # Make sure the child files aren't there flag = False try: listDatasetFiles(apiRef=localAPI, datasetPath='/%s/%s_2/%s' % (name, name, tier)) except Exception as ex: flag = True self.assertTrue(flag) # Third round # Both of the parent blocks should have transferred # So the child block should now transfer testDBSUpload.algorithm() result = myThread.dbi.processData( "SELECT status FROM dbsbuffer_block")[0].fetchall() self.assertEqual(result, [('InGlobalDBS', ), ('InGlobalDBS', ), ('Open', )]) flag = False try: result = listDatasetFiles(apiRef=localAPI, datasetPath='/%s/%s_2/%s' % (name, name, tier)) except Exception as ex: flag = True self.assertFalse(flag) self.assertEqual(len(result), 1) return
class DBSUploadPoller(BaseWorkerThread): """ Handles poll-based DBSUpload """ def __init__(self, config, dbsconfig=None): """ Initialise class members """ myThread = threading.currentThread() BaseWorkerThread.__init__(self) self.config = config # This is slightly dangerous, but DBSUpload depends # on DBSInterface anyway self.bufferFactory = DAOFactory( package="WMComponent.DBSBuffer.Database", logger=myThread.logger, dbinterface=myThread.dbi) factory = WMFactory("dbsUpload", "WMComponent.DBSUpload.Database.Interface") self.uploadToDBS = factory.loadObject("UploadToDBS") addFactory = WMFactory("dbsBuffer", "WMComponent.DBSBuffer.Database.Interface") self.addToBuffer = addFactory.loadObject("AddToBuffer") # Set DBSInterface self.dbsInterface = DBSInterface(config=config) # Set DAOs self.setBlock = self.bufferFactory(classname="DBSBufferFiles.SetBlock") self.setStatus = self.bufferFactory( classname="DBSBufferFiles.SetStatus") # Set config parameters self.doMigration = getattr(self.config.DBSInterface, 'doGlobalMigration', True) if dbsconfig == None: self.dbsconfig = config # initialize the alert framework (if available - config.Alert present) # self.sendAlert will be then be available self.initAlerts(compName="DBSUpload") return def algorithm(self, parameters=None): """ Runs over all available DBSBuffer filesets/algos Commits them using DBSInterface Then checks blocks for timeout """ logging.debug("Running subscription / fileset matching algorithm") try: self.sortBlocks() self.uploadBlocks() except WMException: raise except Exception as ex: msg = "Unhandled exception in DBSUploadPoller\n" msg += str(ex) msg += str(traceback.format_exc()) logging.error(msg) self.sendAlert(6, msg=msg) raise DBSUploadPollerException(msg) return def terminate(self, params): """ Do one more pass, then terminate """ logging.debug("Terminating. Doing one more pass before we die") self.algorithm(params) return def sortBlocks(self): """ _sortBlocks_ Find new files to upload, sort them into blocks Save the blocks in DBSBuffer """ myThread = threading.currentThread() # Grab all the Dataset-Algo combindations dasList = self.uploadToDBS.findUploadableDAS() logging.debug("Recovered %i DAS to upload" % len(dasList)) for dasInfo in dasList: # Go one DAS at a time dasID = dasInfo['DAS_ID'] logging.info("Processing DAS %i" % dasID) # Initial values readyBlocks = [] fileLFNs = [] # Get the dataset-algo information #algo = createAlgoFromInfo(info = dasInfo) dataset = createDatasetFromInfo(info=dasInfo) # Get the files for the DAS files = self.uploadToDBS.findUploadableFilesByDAS(das=dasID) if len(files) < 1: # Then we have no files for this DAS logging.debug("DAS %i has no available files. Continuing." % dasID) continue # Load the blocks for the DAS blocks = self.uploadToDBS.loadBlocksByDAS(das=dasID) logging.debug("Retrieved %i files and %i blocks from DB." % (len(files), len(blocks))) # Sort the files and blocks by location locationDict = sortListByKey(files, 'locations') blockDict = sortListByKey(blocks, 'location') logging.debug("Active DAS file locations: %s" % locationDict.keys()) logging.debug("Active Block file locations: %s" % blockDict.keys()) try: # Sort files that are already in blocks # back into those blocks # pass by reference blockDict, locationDict = preassignBlocks(files=locationDict, blocks=blockDict) # Now go over all the files for location in locationDict.keys(): # Split files into blocks locFiles = locationDict.get(location, []) locBlocks = blockDict.get(location, []) locBlocks = self.splitFilesIntoBlocks(files=locFiles, blocks=locBlocks, dataset=dataset, location=location) readyBlocks.extend(locBlocks) except WMException: raise except Exception as ex: msg = "Unhandled exception while sorting files into blocks for DAS %i\n" % dasID msg += str(ex) msg += str(traceback.format_exc()) logging.error(msg) self.sendAlert(6, msg=msg) logging.debug("BlockDictionary: %s" % blockDict) logging.debug("FileDictionary: %s" % locationDict) raise DBSUploadPollerException(msg) # At this point, all blocks should be in readyBlocks # STEP TWO: Commit blocks to DBSBuffer fileLFNs = self.createBlocksInDBSBuffer(readyBlocks=readyBlocks) # Now we should have all the blocks in DBSBuffer # Time to set the status of the files lfnList = [x['lfn'] for x in files] self.setStatus.execute(lfns=lfnList, status="READY", conn=myThread.transaction.conn, transaction=myThread.transaction) # All files that were in NOTUPLOADED # And had uploaded parents # Should now be in assigned to blocks in DBSBuffer, and in the READY status return def uploadBlocks(self): """ _uploadBlocks_ Load all OPEN blocks out of the database with all their necessary files Once we have the blocks, determine which ones are ready to be uploaded Also determine which ones are ready to be migrated Upload them """ myThread = threading.currentThread() # Get the blocks # This should grab all Pending and Open blocks blockInfo = self.uploadToDBS.loadBlocks() blocks = [] if len(blockInfo) < 1: # Then we have no block, and probably no files logging.info("No blocks in this iteration. Returning") return # Assemble the blocks for info in blockInfo: block = createBlock(datasetPath='blank', location='blank') block['id'] = info['id'] block['das'] = info['das'] block['Name'] = info['blockname'] block['CreationDate'] = info['create_time'] block['open'] = info['open'] block['MaxCloseTime'] = info['block_close_max_wait_time'] blocks.append(block) dasIDs = [] for block in blocks: if block['das'] not in dasIDs: dasIDs.append(block['das']) dasAlgoDataset = {} dasAlgoInfo = self.uploadToDBS.loadDASInfoByID(ids=dasIDs) for dasInfo in dasAlgoInfo: algo = createAlgoFromInfo(info=dasInfo) dataset = createDatasetFromInfo(info=dasInfo) dasAlgoDataset[dasInfo['DAS_ID']] = { 'dataset': dataset, 'algo': algo } # At this point we should have the dataset and algo information # The blocks # And the files # Time to sort the files into blocks # the counter / watcher of the alertUploadQueueSize to possibly send alerts alertUploadQueueSize = getattr(self.config.DBSUpload, "alertUploadQueueSize", None) alertUploadQueueSizeCounter = 0 for block in blocks: files = self.uploadToDBS.loadFilesFromBlocks(blockID=block['id']) for f in files: if f['blockID'] == block['id']: # Put file in this block logging.debug("Setting file %s to block %s" % (f['lfn'], block['Name'])) block['newFiles'].append(f) alertUploadQueueSizeCounter += 1 # check alertUploadQueueSize threshold (alert condition) if alertUploadQueueSize: if alertUploadQueueSizeCounter >= int(alertUploadQueueSize): msg = ("DBS upload queue size (%s) exceeded configured " "threshold (%s)." % (alertUploadQueueSizeCounter, alertUploadQueueSize)) self.sendAlert(6, msg=msg) # Check for block timeout for block in blocks: if time.time() - block['CreationDate'] > block['MaxCloseTime']: logging.info( "Setting status to Pending due to timeout for block %s" % block['Name']) block['open'] = 'Pending' # Should have files in blocks, now assign them to DAS for dasID in dasAlgoDataset.keys(): readyBlocks = [] dataset = dasAlgoDataset[dasID]['dataset'] algo = dasAlgoDataset[dasID]['algo'] for block in blocks: if len(block['newFiles']) > 0: # Assign a location from the files logging.debug("Block %s has %i files" % (block['Name'], len(block['newFiles']))) block['location'] = list( block['newFiles'][0]['locations'])[0] if block['das'] == dasID: if block['open'] == 'Pending': # Always attach pending blocks logging.debug("Attaching block %s" % block['Name']) readyBlocks.append(block) elif len(block['newFiles']) > 0: # Else you only deal with blocks if they have new files logging.debug("Attaching block %s" % block['Name']) readyBlocks.append(block) if len(readyBlocks) < 1: # Nothing to do logging.debug("Nothing to do for DAS %i in uploadBlocks" % dasID) continue try: # Now do the real action of transferring crap # Damn it Anzar: Why does DBS print stuff out? for singleBlock in readyBlocks: originalOut = sys.stdout originalErr = sys.stderr sys.stdout = open(os.devnull, 'w') sys.stderr = open(os.devnull, 'w') if getattr(self.config.DBSUpload, 'abortStepThree', False): # Blow the stack for testing purposes raise DBSUploadPollerException('None') logging.info( "About to upload to DBS for DAS %i with %i blocks" % (dasID, len(readyBlocks))) affBlocks = self.dbsInterface.runDBSBuffer( algo=algo, dataset=dataset, blocks=[singleBlock]) sys.stdout = originalOut sys.stderr = originalErr # Update DBSBuffer with current information myThread.transaction.begin() for block in affBlocks: logging.info( "Successfully inserted %i files for block %s." % (len(block['insertedFiles']), block['Name'])) self.uploadToDBS.setBlockStatus( block=block['Name'], locations=[block['location']], openStatus=block['open']) if block[ 'open'] == 'InGlobalDBS' or not self.doMigration: # Set block files as in global if they've been migrated. # If we aren't doing global migrations, all files are in global logging.debug("Block %s now listed in global DBS" % block['Name']) self.uploadToDBS.closeBlockFiles( blockname=block['Name'], status='GLOBAL') else: logging.debug( "Block %s now uploaded to local DBS" % block['Name']) self.uploadToDBS.closeBlockFiles( blockname=block['Name'], status='LOCAL') logging.debug( "About to do post-upload DBS commit for DAS %i" % dasID) myThread.transaction.commit() # New plan: If we get an error in trying to commit a block to DBS # then we just rollback the transaction and continue to the next # block - ignoring the exception except WMException: if getattr(myThread, 'transaction', None) != None: myThread.transaction.rollbackForError() pass #raise except Exception as ex: msg = 'Error in committing files to DBS\n' msg += str(ex) msg += str(traceback.format_exc()) logging.error(msg) self.sendAlert(6, msg=msg) if getattr(myThread, 'transaction', None) != None: myThread.transaction.rollbackForError() pass #raise DBSUploadPollerException(msg) return def splitFilesIntoBlocks(self, files, blocks, dataset, location): """ Break the files into blocks based on config params Create a new block when necessary. """ blocksToHandle = [] if len(blocks) > 1: # Well, then we have a bit of a problem # Decide what to do about this later logging.error("More then one open block for this DAS") if len(blocks) == 0: currentBlock = createBlock(datasetPath=dataset['Path'], location=location) else: currentBlock = blocks[0] for newFile in files: # Check to see if blocks are full if not self.isBlockOpen(block=currentBlock): # Add old block to return list # Create a new block currentBlock['open'] = 'Pending' blocksToHandle.append(currentBlock) currentBlock = createBlock(datasetPath=dataset['Path'], location=location) # Check if the file has the closing settings, otherwise plug in the settings from the newFile if currentBlock['MaxCloseTime'] is None or currentBlock['MaxCloseFiles'] is None \ or currentBlock['MaxCloseSize'] is None or currentBlock['MaxCloseEvents'] is None: currentBlock['MaxCloseTime'] = newFile[ 'block_close_max_wait_time'] currentBlock['MaxCloseEvents'] = newFile[ 'block_close_max_events'] currentBlock['MaxCloseFiles'] = newFile[ 'block_close_max_files'] currentBlock['MaxCloseSize'] = newFile['block_close_max_size'] # Now process the file currentBlock['newFiles'].append(newFile) currentBlock['BlockSize'] += newFile['size'] currentBlock['NumberOfFiles'] += 1 currentBlock['NumberOfEvents'] += newFile['events'] if currentBlock['NumberOfFiles'] > 0: blocksToHandle.append(currentBlock) return blocksToHandle def isBlockOpen(self, block): """ _isBlockOpen_ Tells you if the block should be closed """ if block['MaxCloseTime'] is None or block['MaxCloseFiles'] is None \ or block['MaxCloseSize'] is None or block['MaxCloseEvents'] is None: return True if time.time() - int(block.get('CreationDate', 0)) >= block['MaxCloseTime']: # We've timed out on this block return False if block['NumberOfFiles'] >= block['MaxCloseFiles']: # We've got too many files return False if float(block.get('BlockSize')) >= block['MaxCloseSize']: # Block is too big return False if block['NumberOfEvents'] >= block['MaxCloseEvents']: # Too many events in the block return False return True def createBlocksInDBSBuffer(self, readyBlocks): """ _createBlocksInDBSBuffer_ Create the blocks in the local database in their initial states. """ myThread = threading.currentThread() fileLFNs = [] try: # Do this in its own transaction myThread.transaction.begin() for block in readyBlocks: # First insert each block logging.info("Prepping block %s for DBS with status %s" % (block['Name'], block['open'])) self.uploadToDBS.setBlockStatus(block=block['Name'], locations=[block['location']], openStatus=block['open'], time=int( block['CreationDate'])) # Then insert files from each block blockFileList = [] for f in block.get('newFiles', []): blockFileList.append(f['lfn']) if len(blockFileList) > 0: self.setBlock.execute(lfn=blockFileList, blockName=block['Name'], conn=myThread.transaction.conn, transaction=myThread.transaction) fileLFNs.extend(blockFileList) if getattr(self.config.DBSUpload, 'abortStepTwo', False): # Blow the stack for testing purposes raise DBSUploadPollerException('None') logging.debug( "Committing transaction at the end of DBSBuffer insertion.") myThread.transaction.commit() except WMException as ex: if getattr(myThread, 'transaction', None) != None: myThread.transaction.rollback() raise except Exception as ex: msg = 'Error in committing blocks to DBSBuffer\n' msg += str(ex) msg += str(traceback.format_exc()) logging.error(msg) self.sendAlert(6, msg=msg) if getattr(myThread, 'transaction', None) != None: myThread.transaction.rollback() raise DBSUploadPollerException(msg) return fileLFNs
def testB_AlgoMigration(self): """ _AlgoMigration_ Test our ability to migrate multiple algos to global Do this by creating, mid-poll, two separate batches of files One with the same dataset but a different algo One with the same algo, but a different dataset See that they both get to global """ #raise nose.SkipTest myThread = threading.currentThread() config = self.createConfig() self.injectWorkflow(MaxWaitTime = 20) name = "ThisIsATest_%s" % (makeUUID()) tier = "RECO" nFiles = 12 files = self.getFiles(name = name, tier = tier, nFiles = nFiles) datasetPath = '/%s/%s/%s' % (name, name, tier) # Load components that are necessary to check status factory = WMFactory("dbsUpload", "WMComponent.DBSUpload.Database.Interface") dbinterface = factory.loadObject("UploadToDBS") dbsInterface = DBSInterface(config = config) localAPI = dbsInterface.getAPIRef() globeAPI = dbsInterface.getAPIRef(globalRef = True) testDBSUpload = DBSUploadPoller(config = config) testDBSUpload.algorithm() # There should now be one block result = listBlocks(apiRef = globeAPI, datasetPath = datasetPath) self.assertEqual(len(result), 1) # Okay, by now, the first migration should have gone through. # Now create a second batch of files with the same dataset # but a different algo. for i in range(0, nFiles): testFile = DBSBufferFile(lfn = '%s-batch2-%i' %(name, i), size = 1024, events = 20, checksums = {'cksum': 1}, locations = "malpaquet") testFile.setAlgorithm(appName = "cmsRun", appVer = "CMSSW_3_1_1", appFam = tier, psetHash = "GIBBERISH_PART2", configContent = self.configURL) testFile.setDatasetPath(datasetPath) testFile.addRun(Run( 1, *[46])) testFile.create() # Have to do things twice to get parents testDBSUpload.algorithm() testDBSUpload.algorithm() # There should now be two blocks result = listBlocks(apiRef = globeAPI, datasetPath = datasetPath) self.assertEqual(len(result), 2) # Now create another batch of files with the original algo # But in a different dataset for i in range(0, nFiles): testFile = DBSBufferFile(lfn = '%s-batch3-%i' %(name, i), size = 1024, events = 20, checksums = {'cksum': 1}, locations = "malpaquet") testFile.setAlgorithm(appName = name, appVer = "CMSSW_3_1_1", appFam = tier, psetHash = "GIBBERISH", configContent = self.configURL) testFile.setDatasetPath('/%s/%s_3/%s' % (name, name, tier)) testFile.addRun(Run( 1, *[46])) testFile.create() # Do it twice for parentage. testDBSUpload.algorithm() testDBSUpload.algorithm() # There should now be one block result = listBlocks(apiRef = globeAPI, datasetPath = '/%s/%s_3/%s' % (name, name, tier)) self.assertEqual(len(result), 1) # Well, all the blocks got there, so we're done return
def testA_basicUploadTest(self): """ _basicUploadTest_ Do everything simply once Create dataset, algo, files, blocks, upload them, mark as done, finish them, migrate them Also check the timeout """ myThread = threading.currentThread() config = self.createConfig() self.injectWorkflow(MaxWaitTime = 3) config.DBSUpload.pollInterval = 4 name = "ThisIsATest_%s" % (makeUUID()) tier = "RECO" nFiles = 12 files = self.getFiles(name = name, tier = tier, nFiles = nFiles) datasetPath = '/%s/%s/%s' % (name, name, tier) # Load components that are necessary to check status factory = WMFactory("dbsUpload", "WMComponent.DBSUpload.Database.Interface") dbinterface = factory.loadObject("UploadToDBS") dbsInterface = DBSInterface(config = config) localAPI = dbsInterface.getAPIRef() globeAPI = dbsInterface.getAPIRef(globalRef = True) # In the first round we should create blocks for the first dataset # The child dataset should not be handled until the parent is uploaded testDBSUpload = DBSUploadPoller(config = config) testDBSUpload.algorithm() # First, see if there are any blocks # One in DBS, one not in DBS result = myThread.dbi.processData("SELECT status FROM dbsbuffer_block")[0].fetchall() self.assertEqual(len(result), 2) self.assertEqual(result, [('InGlobalDBS',), ('Open',)]) # Check to see if datasets and algos are in local DBS result = listAlgorithms(apiRef = localAPI, patternExe = name) self.assertEqual(len(result), 1) self.assertEqual(result[0]['ExecutableName'], name) result = listPrimaryDatasets(apiRef = localAPI, match = name) self.assertEqual(result, [name]) result = listProcessedDatasets(apiRef = localAPI, primary = name, dataTier = "*") # Then check and see that the closed block made it into local DBS affectedBlocks = listBlocks(apiRef = localAPI, datasetPath = datasetPath) if affectedBlocks[0]['OpenForWriting'] == '0': self.assertEqual(affectedBlocks[1]['OpenForWriting'], '1') self.assertEqual(affectedBlocks[0]['NumberOfFiles'], 10) self.assertEqual(affectedBlocks[1]['NumberOfFiles'], 2) else: self.assertEqual(affectedBlocks[0]['OpenForWriting'], '1') self.assertEqual(affectedBlocks[1]['NumberOfFiles'], 10) self.assertEqual(affectedBlocks[0]['NumberOfFiles'], 2) # Check to make sure all the files are in local result = listDatasetFiles(apiRef = localAPI, datasetPath = datasetPath) fileLFNs = [x['lfn'] for x in files] for lfn in fileLFNs: self.assertTrue(lfn in result) # Make sure the child files aren't there flag = False try: listDatasetFiles(apiRef = localAPI, datasetPath = '/%s/%s_2/%s' % (name, name, tier)) except Exception, ex: flag = True
def testC_FailTest(self): """ _FailTest_ THIS TEST IS DANGEROUS! Figure out what happens when we trigger rollbacks """ myThread = threading.currentThread() config = self.createConfig() config.DBSUpload.abortStepTwo = True originalOut = sys.stdout originalErr = sys.stderr dbsInterface = DBSInterface(config = config) localAPI = dbsInterface.getAPIRef() globeAPI = dbsInterface.getAPIRef(globalRef = True) name = "ThisIsATest_%s" % (makeUUID()) tier = "RECO" nFiles = 12 files = self.getFiles(name = name, tier = tier, nFiles = nFiles) datasetPath = '/%s/%s/%s' % (name, name, tier) testDBSUpload = DBSUploadPoller(config = config) try: testDBSUpload.algorithm() except Exception as ex: pass # Aborting in step two should result in no results result = myThread.dbi.processData("SELECT status FROM dbsbuffer_block")[0].fetchall() self.assertEqual(len(result), 0) config.DBSUpload.abortStepTwo = False config.DBSUpload.abortStepThree = True testDBSUpload = DBSUploadPoller(config = config) try: testDBSUpload.algorithm() except Exception as ex: pass result = myThread.dbi.processData("SELECT status FROM dbsbuffer_block")[0].fetchall() self.assertEqual(result, [('Pending',), ('Open',)]) result = myThread.dbi.processData("SELECT status FROM dbsbuffer_file WHERE dataset_algo = 1")[0].fetchall() for res in result: self.assertEqual(res[0], 'READY') config.DBSUpload.abortStepThree = False self.injectWorkflow(MaxWaitTime = 300) testDBSUpload = DBSUploadPoller(config = config) testDBSUpload.algorithm() # After this, one block should have been uploaded, one should still be open # This is the result of the pending block updating, and the open block staying open result = myThread.dbi.processData("SELECT status, id FROM dbsbuffer_block")[0].fetchall() self.assertEqual(result, [('InGlobalDBS', 3L), ('Open', 4L)]) # Check that one block got there result = listBlocks(apiRef = globeAPI, datasetPath = datasetPath) self.assertEqual(len(result), 1) self.assertEqual(result[0]['NumberOfFiles'], 10) self.assertEqual(result[0]['NumberOfEvents'], 200) self.assertEqual(result[0]['BlockSize'], 10240) # Check that ten files got there result = listDatasetFiles(apiRef = globeAPI, datasetPath = datasetPath) self.assertEqual(len(result), 10) myThread.dbi.processData("UPDATE dbsbuffer_workflow SET block_close_max_wait_time = 1") testDBSUpload = DBSUploadPoller(config = config) time.sleep(3) testDBSUpload.algorithm() result = myThread.dbi.processData("SELECT status FROM dbsbuffer_block")[0].fetchall() self.assertEqual(result, [('InGlobalDBS',), ('InGlobalDBS',)]) result = listDatasetFiles(apiRef = globeAPI, datasetPath = datasetPath) self.assertEqual(len(result), 12) fileLFNs = [x['lfn'] for x in files] for lfn in fileLFNs: self.assertTrue(lfn in result) testDBSUpload.algorithm() result = myThread.dbi.processData("SELECT status FROM dbsbuffer_block")[0].fetchall() self.assertEqual(result, [('InGlobalDBS',), ('InGlobalDBS',), ('Open',)]) time.sleep(5) testDBSUpload.algorithm() time.sleep(2) result = myThread.dbi.processData("SELECT status FROM dbsbuffer_block")[0].fetchall() self.assertEqual(result, [('InGlobalDBS',), ('InGlobalDBS',), ('InGlobalDBS',)]) result = listDatasetFiles(apiRef = globeAPI, datasetPath = '/%s/%s_2/%s' % (name, name, tier)) self.assertEqual(len(result), 1) sys.stdout = originalOut sys.stderr = originalErr return