def __init__(self, config): # queue url used in WorkQueueManager self.thisAgentUrl = "http://" + config.Agent.hostName + ":5984" self.globalBackend = WorkQueueBackend(config.WorkloadSummary.couchurl) self.localBackend = WorkQueueBackend(config.WorkQueueManager.couchurl) self.dbsUtil = DBSBufferUtil() self.condorAPI = PyCondorAPI()
def __init__(self, config): """ Initialise class members """ logging.info("Running __init__ for DBS3 Uploader") BaseWorkerThread.__init__(self) self.config = config # This is slightly dangerous, but DBSUpload depends # on DBSInterface anyway self.dbsUrl = self.config.DBS3Upload.dbsUrl self.dbsUtil = DBSBufferUtil() myThread = threading.currentThread() self.daoFactory = DAOFactory(package="WMComponent.DBS3Buffer", logger=myThread.logger, dbinterface=myThread.dbi) self.pool = [] self.blocksToCheck = [] self.workInput = None self.workResult = None self.nProc = getattr(self.config.DBS3Upload, 'nProcesses', 4) self.wait = getattr(self.config.DBS3Upload, 'dbsWaitTime', 2) self.nTries = getattr(self.config.DBS3Upload, 'dbsNTries', 300) self.physicsGroup = getattr(self.config.DBS3Upload, "physicsGroup", "NoGroup") self.datasetType = getattr(self.config.DBS3Upload, "datasetType", "PRODUCTION") self.primaryDatasetType = getattr(self.config.DBS3Upload, "primaryDatasetType", "mc") self.blockCount = 0 self.dbsApi = DbsApi(url=self.dbsUrl) # List of blocks currently in processing self.queuedBlocks = [] # Set up the pool of worker processes self.setupPool() # Setting up any cache objects self.blockCache = {} self.filesToUpdate = [] self.produceCopy = getattr(self.config.DBS3Upload, 'copyBlock', False) self.copyPath = getattr(self.config.DBS3Upload, 'copyBlockPath', '/data/mnorman/block.json') self.timeoutWaiver = 1 return
def __init__(self, config, dbsconfig=None): """ Initialise class members """ logging.info("Running __init__ for DBS3 Uploader") #myThread = threading.currentThread() BaseWorkerThread.__init__(self) self.config = config # This is slightly dangerous, but DBSUpload depends # on DBSInterface anyway self.maxBlockFiles = self.config.DBSUpload.DBSBlockMaxFiles self.maxBlockTime = self.config.DBSUpload.DBSBlockMaxTime self.maxBlockSize = self.config.DBSUpload.DBSBlockMaxSize self.dbsUrl = self.config.DBSUpload.dbsUrl self.dbsUtil = DBSBufferUtil() self.pool = [] self.input = None self.result = None self.nProc = getattr(self.config.DBSUpload, 'nProcesses', 4) self.wait = getattr(self.config.DBSUpload, 'dbsWaitTime', 1) self.nTries = getattr(self.config.DBSUpload, 'dbsNTries', 300) self.dbs3UploadOnly = getattr(self.config.DBSUpload, 'dbs3UploadOnly', False) self.physicsGroup = getattr(self.config.DBSUpload, 'physicsGroup', 'DBS3Test') self.blockCount = 0 # List of blocks currently in processing self.queuedBlocks = [] # Set up the pool of worker processes self.setupPool() # Setting up any cache objects self.blockCache = {} self.dasCache = {} self.filesToUpdate = [] self.produceCopy = getattr(self.config.DBSUpload, 'copyBlock', False) self.copyPath = getattr(self.config.DBSUpload, 'copyBlockPath', '/data/mnorman/block.json') return
def setup(self, parameters): """ set db connection(couchdb, wmbs) to prepare to gather information """ # set the connection to local queue if not hasattr(self.config, "Tier0Feeder"): self.localQueue = WorkQueueService( self.config.AnalyticsDataCollector.localQueueURL) # set the connection for local couchDB call self.localCouchDB = LocalCouchDBData( self.config.AnalyticsDataCollector.localCouchURL, self.config.JobStateMachine.summaryStatsDBName, self.summaryLevel) # interface to WMBS/BossAir db myThread = threading.currentThread() # set wmagent db data self.wmagentDB = WMAgentDBData(self.summaryLevel, myThread.dbi, myThread.logger) # set the connection for local couchDB call self.localSummaryCouchDB = WMStatsWriter( self.config.AnalyticsDataCollector.localWMStatsURL, appName="WMStatsAgent") # use local db for tier0 if hasattr(self.config, "Tier0Feeder"): centralRequestCouchDBURL = self.config.AnalyticsDataCollector.localT0RequestDBURL else: centralRequestCouchDBURL = self.config.AnalyticsDataCollector.centralRequestDBURL self.centralRequestCouchDB = RequestDBWriter( centralRequestCouchDBURL, couchapp=self.config.AnalyticsDataCollector.RequestCouchApp) self.centralWMStatsCouchDB = WMStatsWriter( self.config.AnalyticsDataCollector.centralWMStatsURL) #TODO: change the config to hold couch url self.localCouchServer = CouchMonitor( self.config.JobStateMachine.couchurl) self.dbsBufferUtil = DBSBufferUtil() if self.pluginName is not None: pluginFactory = WMFactory( "plugins", "WMComponent.AnalyticsDataCollector.Plugins") self.plugin = pluginFactory.loadObject(classname=self.pluginName)
def setUp(self): """ _setUp_ Setup the database and logging connection. Create some DBSBuffer tables and fake data for testing """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema( customModules=["WMComponent.DBS3Buffer", "WMCore.WMBS"], useDefault=False) myThread = threading.currentThread() self.dbsbufferFactory = DAOFactory(package="WMComponent.DBS3Buffer", logger=myThread.logger, dbinterface=myThread.dbi) self.wmbsFactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) self.dbsUtil = DBSBufferUtil() # Create two test dbsbuffer workflows insertWorkflow = self.dbsbufferFactory(classname="InsertWorkflow") insertWorkflow.execute("Test1", "Task1", 0, 0, 0, 0) insertWorkflow.execute("Test2", "Task2", 0, 0, 0, 0) # Update one workflow to "completed" state updateWorkflow = self.dbsbufferFactory( classname="UpdateWorkflowsToCompleted") updateWorkflow.execute(["Test1"]) # Create a test wmbs workflow testWorkflow = Workflow(spec="somespec.xml", owner="Erik", name="Test1", task="Task1") testWorkflow.create() # Create a test dbsbuffer file self.createTestFiles()
def __init__(self, config): """ Initialise class members """ logging.info("Running __init__ for DBS3 Uploader") BaseWorkerThread.__init__(self) self.config = config # This is slightly dangerous, but DBSUpload depends # on DBSInterface anyway self.dbsUrl = self.config.DBS3Upload.dbsUrl # Tier0 Agent don't need this if hasattr(self.config, "Tier0Feeder"): self.wmstatsServerSvc = None else: wmstatsSvcURL = self.config.General.centralWMStatsURL.replace( "couchdb/wmstats", "wmstatsserver") self.wmstatsServerSvc = WMStatsServer(wmstatsSvcURL) self.dbsUtil = DBSBufferUtil() myThread = threading.currentThread() daoFactory = DAOFactory(package="WMComponent.DBS3Buffer", logger=myThread.logger, dbinterface=myThread.dbi) self.updateBlocksDAO = daoFactory(classname="UpdateBlocks") self.updateFilesDAO = daoFactory(classname="UpdateFiles") self.createBlocksDAO = daoFactory(classname="CreateBlocks") self.setBlockFilesDAO = daoFactory(classname="SetBlockFiles") self.pool = [] self.blocksToCheck = [] self.workInput = None self.workResult = None self.nProc = getattr(self.config.DBS3Upload, 'nProcesses', 4) self.wait = getattr(self.config.DBS3Upload, 'dbsWaitTime', 2) self.nTries = getattr(self.config.DBS3Upload, 'dbsNTries', 300) self.physicsGroup = getattr(self.config.DBS3Upload, "physicsGroup", "NoGroup") self.datasetType = getattr(self.config.DBS3Upload, "datasetType", "PRODUCTION") self.primaryDatasetType = getattr(self.config.DBS3Upload, "primaryDatasetType", "mc") self.blockCount = 0 self.dbsApi = DbsApi(url=self.dbsUrl) # List of blocks currently in processing self.queuedBlocks = [] # Set up the pool of worker processes self.setupPool() # Setting up any cache objects self.blockCache = {} self.filesToUpdate = [] self.produceCopy = getattr(self.config.DBS3Upload, 'dumpBlock', False) self.copyPath = os.path.join( getattr(self.config.DBS3Upload, 'componentDir', '/data/srv/'), 'dbsuploader_block.json') self.timeoutWaiver = 1 self.datasetParentageCache = {} return
def testCloseSettingsPerWorkflow(self): """ _testCloseSettingsPerWorkflow_ Test the block closing mechanics in the DBS3 uploader, this uses a fake dbs api to avoid reliance on external services. """ # Signal trapExit that we are a friend os.environ["DONT_TRAP_EXIT"] = "True" try: # Monkey patch the imports of DbsApi from WMComponent.DBS3Buffer import DBSUploadPoller as MockDBSUploadPoller MockDBSUploadPoller.DbsApi = MockDbsApi # Set the poller and the dbsUtil for verification myThread = threading.currentThread() (_, dbsFilePath) = mkstemp(dir = self.testDir) self.dbsUrl = dbsFilePath config = self.getConfig() dbsUploader = MockDBSUploadPoller.DBSUploadPoller(config = config) dbsUtil = DBSBufferUtil() # First test is event based limits and timeout with no new files. # Set the files and workflow acqEra = "TropicalSeason%s" % (int(time.time())) workflowName = 'TestWorkload%s' % (int(time.time())) taskPath = '/%s/TestProcessing' % workflowName self.injectWorkflow(workflowName, taskPath, MaxWaitTime = 2, MaxFiles = 100, MaxEvents = 150) self.createParentFiles(acqEra, nFiles = 20, workflowName = workflowName, taskPath = taskPath) # The algorithm needs to be run twice. On the first iteration it will # create all the blocks and upload one with less than 150 events. # On the second iteration the second block is uploaded. dbsUploader.algorithm() dbsUploader.checkBlocks() openBlocks = dbsUtil.findOpenBlocks() self.assertEqual(len(openBlocks), 1) globalFiles = myThread.dbi.processData("SELECT id FROM dbsbuffer_file WHERE status = 'InDBS'")[0].fetchall() notUploadedFiles = myThread.dbi.processData("SELECT id FROM dbsbuffer_file WHERE status = 'NOTUPLOADED'")[0].fetchall() self.assertEqual(len(globalFiles), 14) self.assertEqual(len(notUploadedFiles), 6) # Check the fake DBS for data fakeDBS = open(self.dbsUrl, 'r') fakeDBSInfo = json.load(fakeDBS) fakeDBS.close() self.assertEqual(len(fakeDBSInfo), 2) for block in fakeDBSInfo: self.assertTrue('block_events' not in block['block']) self.assertEqual(block['block']['file_count'], 7) self.assertEqual(block['block']['open_for_writing'], 0) self.assertTrue('close_settings' not in block) time.sleep(3) dbsUploader.algorithm() dbsUploader.checkBlocks() openBlocks = dbsUtil.findOpenBlocks() self.assertEqual(len(openBlocks), 0) fakeDBS = open(self.dbsUrl, 'r') fakeDBSInfo = json.load(fakeDBS) fakeDBS.close() self.assertEqual(len(fakeDBSInfo), 3) for block in fakeDBSInfo: if block['block']['file_count'] != 6: self.assertEqual(block['block']['file_count'], 7) self.assertTrue('block_events' not in block['block']) self.assertEqual(block['block']['open_for_writing'], 0) self.assertTrue('close_settings' not in block) # Now check the limit by size and timeout with new files acqEra = "TropicalSeason%s" % (int(time.time())) workflowName = 'TestWorkload%s' % (int(time.time())) taskPath = '/%s/TestProcessing' % workflowName self.injectWorkflow(workflowName, taskPath, MaxWaitTime = 2, MaxFiles = 5, MaxEvents = 200000000) self.createParentFiles(acqEra, nFiles = 16, workflowName = workflowName, taskPath = taskPath) dbsUploader.algorithm() dbsUploader.checkBlocks() openBlocks = dbsUtil.findOpenBlocks() self.assertEqual(len(openBlocks), 1) fakeDBS = open(self.dbsUrl, 'r') fakeDBSInfo = json.load(fakeDBS) fakeDBS.close() self.assertEqual(len(fakeDBSInfo), 6) for block in fakeDBSInfo: if acqEra in block['block']['block_name']: self.assertEqual(block['block']['file_count'], 5) self.assertTrue('block_events' not in block['block']) self.assertTrue('close_settings' not in block) self.assertEqual(block['block']['open_for_writing'], 0) # Put more files, they will go into the same block and then it will be closed # after timeout time.sleep(3) self.createParentFiles(acqEra, nFiles = 3, workflowName = workflowName, taskPath = taskPath) dbsUploader.algorithm() dbsUploader.checkBlocks() openBlocks = dbsUtil.findOpenBlocks() self.assertEqual(len(openBlocks), 0) fakeDBS = open(self.dbsUrl, 'r') fakeDBSInfo = json.load(fakeDBS) fakeDBS.close() self.assertEqual(len(fakeDBSInfo), 7) for block in fakeDBSInfo: if acqEra in block['block']['block_name']: if block['block']['file_count'] < 5: self.assertEqual(block['block']['file_count'], 4) else: self.assertEqual(block['block']['file_count'], 5) self.assertTrue('block_events' not in block['block']) self.assertEqual(block['block']['open_for_writing'], 0) self.assertTrue('close_settings' not in block) # Finally test size limits acqEra = "TropicalSeason%s" % (int(time.time())) workflowName = 'TestWorkload%s' % (int(time.time())) taskPath = '/%s/TestProcessing' % workflowName self.injectWorkflow(workflowName, taskPath, MaxWaitTime = 1, MaxFiles = 500, MaxEvents = 200000000, MaxSize = 2048) self.createParentFiles(acqEra, nFiles = 7, workflowName = workflowName, taskPath = taskPath) dbsUploader.algorithm() dbsUploader.checkBlocks() time.sleep(2) dbsUploader.algorithm() dbsUploader.checkBlocks() self.assertEqual(len(openBlocks), 0) fakeDBS = open(self.dbsUrl, 'r') fakeDBSInfo = json.load(fakeDBS) fakeDBS.close() self.assertEqual(len(fakeDBSInfo), 11) for block in fakeDBSInfo: if acqEra in block['block']['block_name']: if block['block']['file_count'] != 1: self.assertEqual(block['block']['block_size'], 2048) self.assertEqual(block['block']['file_count'], 2) self.assertTrue('block_events' not in block['block']) self.assertEqual(block['block']['open_for_writing'], 0) self.assertTrue('close_settings' not in block) except: self.fail("We failed at some point in the test") finally: # We don't trust anyone else with _exit del os.environ["DONT_TRAP_EXIT"] return
def testDualUpload(self): """ _testDualUpload_ Verify that the dual upload mode works correctly. """ self.dbsApi = DbsApi(url = self.dbsUrl) config = self.getConfig() dbsUploader = DBSUploadPoller(config = config) dbsUtil = DBSBufferUtil() # First test verifies that uploader will poll and then not do anything # as the database is empty. dbsUploader.algorithm() acqEra = "Summer%s" % (int(time.time())) parentFiles = self.createParentFiles(acqEra) (moreParentFiles, childFiles) = \ self.createFilesWithChildren(parentFiles, acqEra) allFiles = parentFiles + moreParentFiles allBlocks = [] for i in range(4): DBSBufferDataset(parentFiles[0]["datasetPath"]).create() blockName = parentFiles[0]["datasetPath"] + "#" + makeUUID() dbsBlock = DBSBufferBlock(blockName, location = "malpaquet", datasetpath = None) dbsBlock.status = "Open" dbsBlock.setDataset(parentFiles[0]["datasetPath"], 'data', 'VALID') dbsUtil.createBlocks([dbsBlock]) for file in allFiles[i * 5 : (i * 5) + 5]: dbsBlock.addFile(file, 'data', 'VALID') dbsUtil.setBlockFiles({"block": blockName, "filelfn": file["lfn"]}) if i < 2: dbsBlock.status = "InDBS" dbsUtil.updateBlocks([dbsBlock]) dbsUtil.updateFileStatus([dbsBlock], "InDBS") allBlocks.append(dbsBlock) DBSBufferDataset(childFiles[0]["datasetPath"]).create() blockName = childFiles[0]["datasetPath"] + "#" + makeUUID() dbsBlock = DBSBufferBlock(blockName, location = "malpaquet", datasetpath = None) dbsBlock.status = "InDBS" dbsBlock.setDataset(childFiles[0]["datasetPath"], 'data', 'VALID') dbsUtil.createBlocks([dbsBlock]) for file in childFiles: dbsBlock.addFile(file, 'data', 'VALID') dbsUtil.setBlockFiles({"block": blockName, "filelfn": file["lfn"]}) dbsUtil.updateFileStatus([dbsBlock], "InDBS") dbsUploader.algorithm() time.sleep(5) dbsUploader.algorithm() time.sleep(5) self.verifyData(parentFiles[0]["datasetPath"], parentFiles) # Change the status of the rest of the parent blocks so we can upload # them and the children. for dbsBlock in allBlocks: dbsBlock.status = "InDBS" dbsUtil.updateBlocks([dbsBlock]) dbsUploader.algorithm() time.sleep(5) self.verifyData(parentFiles[0]["datasetPath"], parentFiles + moreParentFiles) # Run the uploader one more time to upload the children. dbsUploader.algorithm() time.sleep(5) self.verifyData(childFiles[0]["datasetPath"], childFiles) return
def __init__(self): self.dbsUtil = DBSBufferUtil() self.condorAPI = PyCondorAPI()
def testBulkLoad(self): """ _testBulkLoad_ Can we load in bulk? """ addToBuffer = DBSBufferUtil() testFileChildA = DBSBufferFile(lfn="/this/is/a/child/lfnA", size=1024, events=20) testFileChildA.setAlgorithm(appName="cmsRun", appVer="CMSSW_2_1_8", appFam="RECO", psetHash="GIBBERISH", configContent="MOREGIBBERISH") testFileChildA.setDatasetPath("/Cosmics/CRUZET09-PromptReco-v1/RECO") testFileChildB = DBSBufferFile(lfn="/this/is/a/child/lfnB", size=1024, events=20) testFileChildB.setAlgorithm(appName="cmsRun", appVer="CMSSW_2_1_8", appFam="RECO", psetHash="GIBBERISH", configContent="MOREGIBBERISH") testFileChildB.setDatasetPath("/Cosmics/CRUZET09-PromptReco-v1/RECO") testFileChildC = DBSBufferFile(lfn="/this/is/a/child/lfnC", size=1024, events=20) testFileChildC.setAlgorithm(appName="cmsRun", appVer="CMSSW_2_1_8", appFam="RECO", psetHash="GIBBERISH", configContent="MOREGIBBERISH") testFileChildC.setDatasetPath("/Cosmics/CRUZET09-PromptReco-v1/RECO") testFileChildA.create() testFileChildB.create() testFileChildC.create() testFileChildA.setLocation(["se1.fnal.gov", "se1.cern.ch"]) testFileChildB.setLocation(["se1.fnal.gov", "se1.cern.ch"]) testFileChildC.setLocation(["se1.fnal.gov", "se1.cern.ch"]) runSet = set() runSet.add(Run(1, *[45])) runSet.add(Run(2, *[67, 68])) testFileChildA.addRunSet(runSet) testFileChildB.addRunSet(runSet) testFileChildC.addRunSet(runSet) testFileChildA.save() testFileChildB.save() testFileChildC.save() setCksumAction = self.daoFactory( classname="DBSBufferFiles.AddChecksumByLFN") binds = [{ 'lfn': "/this/is/a/child/lfnA", 'cktype': 'adler32', 'cksum': 201 }, { 'lfn': "/this/is/a/child/lfnA", 'cktype': 'cksum', 'cksum': 101 }, { 'lfn': "/this/is/a/child/lfnB", 'cktype': 'adler32', 'cksum': 201 }, { 'lfn': "/this/is/a/child/lfnB", 'cktype': 'cksum', 'cksum': 101 }, { 'lfn': "/this/is/a/child/lfnC", 'cktype': 'adler32', 'cksum': 201 }, { 'lfn': "/this/is/a/child/lfnC", 'cktype': 'cksum', 'cksum': 101 }] setCksumAction.execute(bulkList=binds) testFile = DBSBufferFile(lfn="/this/is/a/lfn", size=1024, events=10) testFile.setAlgorithm(appName="cmsRun", appVer="CMSSW_2_1_8", appFam="RECO", psetHash="GIBBERISH", configContent="MOREGIBBERISH") testFile.setDatasetPath("/Cosmics/CRUZET09-PromptReco-v1/RECO") testFile.create() testFileChildA.addParents([testFile["lfn"]]) testFileChildB.addParents([testFile["lfn"]]) testFileChildC.addParents([testFile["lfn"]]) binds = [{ 'id': testFileChildA.exists() }, { 'id': testFileChildB.exists() }, { 'id': testFileChildC.exists() }] listOfFiles = addToBuffer.loadDBSBufferFilesBulk(fileObjs=binds) # print listOfFiles compareList = [ 'locations', 'psetHash', 'configContent', 'appName', 'appVer', 'appFam', 'events', 'datasetPath', 'runs' ] for f in listOfFiles: self.assertTrue( f['lfn'] in [ "/this/is/a/child/lfnA", "/this/is/a/child/lfnB", "/this/is/a/child/lfnC" ], "Unknown file in loaded results") self.assertEqual(f['checksums'], { 'adler32': '201', 'cksum': '101' }) for parent in f['parents']: self.assertEqual(parent['lfn'], testFile['lfn']) for key in compareList: self.assertEqual(f[key], testFileChildA[key])
def testA_basicFunction(self): """ _basicFunction_ See if I can make the damn thing work. """ myThread = threading.currentThread() config = self.getConfig() from WMComponent.DBS3Buffer.DBSUploadPoller import DBSUploadPoller dbsUploader = DBSUploadPoller(config=config) dbsUtil = DBSBufferUtil() from dbs.apis.dbsClient import DbsApi dbsApi = DbsApi(url=config.DBSUpload.dbsUrl) # This should do nothing # Just making sure we don't crash try: dbsUploader.algorithm() except: dbsUploader.close() raise name = "ThisIsATest%s" % (int(time.time())) tier = "RECO" nFiles = 12 name = name.replace('-', '_') name = '%s-v0' % name files = self.getFiles(name=name, tier=tier, nFiles=nFiles) datasetPath = "/Cosmics/%s/%s" % (name, tier) try: dbsUploader.algorithm() except: dbsUploader.close() raise time.sleep(5) # Now look in DBS try: result = dbsApi.listDatasets(dataset=datasetPath, detail=True, dataset_access_type='PRODUCTION') self.assertEqual(len(result), 1) self.assertEqual(result[0]['data_tier_name'], 'RECO') self.assertEqual(result[0]['processing_version'], 0) self.assertEqual(result[0]['acquisition_era_name'], name.split('-')[0]) result = dbsApi.listFiles(dataset=datasetPath) self.assertEqual(len(result), 11) except: dbsUploader.close() raise # All the blocks except for the last one should # now be there result = myThread.dbi.processData( "SELECT id FROM dbsbuffer_block")[0].fetchall() self.assertEqual(len(result), 12) # The last block should still be open self.assertEqual(len(dbsUtil.findOpenBlocks()), 1) try: dbsUploader.algorithm() except: raise finally: dbsUploader.close() # All files should now be available result = dbsApi.listFiles(dataset=datasetPath) self.assertEqual(len(result), 12) # The last block should now be closed self.assertEqual(len(dbsUtil.findOpenBlocks()), 0) result = myThread.dbi.processData( "SELECT status FROM dbsbuffer_block")[0].fetchall() for res in result: self.assertEqual(res.values()[0], 'InDBS') return