def wmbsSubscriptionStatus(logger, dbi, conn, transaction): """Function to return status of wmbs subscriptions """ action = DAOFactory(package = 'WMBS', logger = logger, dbinterface = dbi)('Monitoring.SubscriptionStatus') return action.execute(conn = conn, transaction = transaction)
def availableScheddSlots(dbi, logger=logging, condorFraction=1): """ check executing jobs and compare with condor limit. return the difference -- executing jobs - (condor limit * condorFraction) """ action = DAOFactory(package='WMCore.WMBS', logger=logger, dbinterface=dbi)(classname="Jobs.GetCountByState") executingJobs = int(action.execute("executing")) maxScheddJobs = getScheddParamValue("MAX_JOBS_PER_OWNER") if maxScheddJobs is None: logger.warning("Failed to retrieve 'MAX_JOBS_PER_OWNER' from HTCondor") return 0 freeSubmitSlots = int(int(maxScheddJobs) * condorFraction - executingJobs) return freeSubmitSlots
def algorithm(self, groupInstance = None, jobInstance = None, *args, **kwargs): """ _algorithm_ A file based splitting algorithm """ # extract some global scheduling parameters self.jobNamePrefix = kwargs.get('jobNamePrefix', "RepackMerge") self.minInputSize = kwargs['minInputSize'] self.maxInputSize = kwargs['maxInputSize'] self.maxInputEvents = kwargs['maxInputEvents'] self.maxInputFiles = kwargs['maxInputFiles'] self.maxEdmSize = kwargs['maxEdmSize'] self.maxOverSize = kwargs['maxOverSize'] self.maxLatency = kwargs['maxLatency'] # catch configuration errors if self.maxOverSize > self.maxEdmSize: self.maxOverSize = self.maxEdmSize self.currentTime = time.time() self.createdGroup = False myThread = threading.currentThread() daoFactory = DAOFactory(package = "T0.WMBS", logger = logging, dbinterface = myThread.dbi) # data discovery getAvailableFilesDAO = daoFactory(classname = "Subscriptions.GetAvailableRepackMergeFiles") availableFiles = getAvailableFilesDAO.execute(self.subscription["id"]) # nothing to do, stop immediately if len(availableFiles) == 0: return # data discovery for already used lumis getUsedLumisDAO = daoFactory(classname = "Subscriptions.GetUsedLumis") usedLumis = getUsedLumisDAO.execute(self.subscription["id"], True) # empty lumis (as declared by StorageManager) are treated the # same way as used lumis, ie. we process around them getEmptyLumisDAO = daoFactory(classname = "Subscriptions.GetLumiHolesForRepackMerge") usedLumis |= getEmptyLumisDAO.execute(self.subscription["id"]) # sort available files by lumi availableFileLumiDict = {} for result in availableFiles: for lumi in range(result['first_lumi'], 1+result['last_lumi']): if lumi not in availableFileLumiDict: availableFileLumiDict[lumi] = [] if lumi == result['first_lumi']: availableFileLumiDict[lumi].append(result) # loop through lumis in order haveLumiHole = False filesByLumi = {} maxUsedLumi = max(usedLumis) if usedLumis else 0 for lumi in range(1, 1+max(maxUsedLumi,max(availableFileLumiDict.keys()))): # lumi contains data => remember it for potential processing if lumi in availableFileLumiDict: filesByLumi[lumi] = availableFileLumiDict[lumi] # lumi is used and we have data => trigger processing elif lumi in usedLumis: if len(filesByLumi) > 0: if haveLumiHole: # if lumi hole check for maxLatency first if self.getDataAge(filesByLumi) > self.maxLatency: self.defineJobs(filesByLumi, True) filesByLumi = {} # if maxLatency not met ignore data for now else: filesByLumi = {} else: self.defineJobs(filesByLumi, True) filesByLumi = {} # if we had a lumi hole it is now not relevant anymore # the next data will have a used lumi in front of it haveLumiHole = False # lumi has no data and isn't used, ie. we have a lumi hole # also has an impact on how to handle later data else: if len(filesByLumi) > 0: # forceClose if maxLatency trigger is met if self.getDataAge(filesByLumi) > self.maxLatency: self.defineJobs(filesByLumi, True) filesByLumi = {} # follow the normal thresholds, but only if # there is no lumi hole in front of the data elif not haveLumiHole: self.defineJobs(filesByLumi, False) filesByLumi = {} # otherwise ignore the data for now else: filesByLumi = {} haveLumiHole = True # now handle whatever data is still left (at the high end of the lumi range) if haveLumiHole: if self.getDataAge(filesByLumi) > self.maxLatency: self.defineJobs(filesByLumi, True) else: fileset = self.subscription.getFileset() fileset.load() self.defineJobs(filesByLumi, not fileset.open) return
def __init__(self, config): BaseWorkerThread.__init__(self) myThread = threading.currentThread() #DAO factory for WMBS objects self.daoFactory = DAOFactory(package = "WMCore.WMBS", \ logger = logging, dbinterface = myThread.dbi) self.config = config #Libraries self.resourceControl = ResourceControl() self.changeState = ChangeState(self.config) self.repollCount = getattr(self.config.JobSubmitter, 'repollCount', 10000) # BossAir self.bossAir = BossAirAPI(config=self.config) # Additions for caching-based JobSubmitter self.workflowTimestamps = {} self.workflowPrios = {} self.cachedJobIDs = set() self.cachedJobs = {} self.jobDataCache = {} self.jobsToPackage = {} self.sandboxPackage = {} self.siteKeys = {} self.locationDict = {} self.cmsNames = {} self.drainSites = set() self.abortSites = set() self.sortedSites = [] self.packageSize = getattr(self.config.JobSubmitter, 'packageSize', 500) self.collSize = getattr(self.config.JobSubmitter, 'collectionSize', self.packageSize * 1000) # initialize the alert framework (if available) self.initAlerts(compName="JobSubmitter") try: if not getattr(self.config.JobSubmitter, 'submitDir', None): self.config.JobSubmitter.submitDir = self.config.JobSubmitter.componentDir self.packageDir = os.path.join(self.config.JobSubmitter.submitDir, 'packages') if not os.path.exists(self.packageDir): os.makedirs(self.packageDir) except Exception, ex: msg = "Error while trying to create packageDir %s\n!" msg += str(ex) logging.error(msg) self.sendAlert(6, msg=msg) try: logging.debug("PackageDir: %s" % self.packageDir) logging.debug("Config: %s" % config) except: pass raise JobSubmitterPollerException(msg)
def setUp(self): """ _setUp_ """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules = ["T0.WMBS"]) self.splitterFactory = SplitterFactory(package = "T0.JobSplitting") myThread = threading.currentThread() daoFactory = DAOFactory(package = "T0.WMBS", logger = logging, dbinterface = myThread.dbi) wmbsDaoFactory = DAOFactory(package = "WMCore.WMBS", logger = logging, dbinterface = myThread.dbi) myThread.dbi.processData("""INSERT INTO wmbs_location (id, site_name, state) VALUES (1, 'SomeSite', 1) """, transaction = False) myThread.dbi.processData("""INSERT INTO wmbs_location_pnns (location, pnn) VALUES (1, 'SomePNN') """, transaction = False) myThread.dbi.processData("""INSERT INTO wmbs_location_pnns (location, pnn) VALUES (1, 'SomePNN2') """, transaction = False) insertRunDAO = daoFactory(classname = "RunConfig.InsertRun") insertRunDAO.execute(binds = { 'RUN' : 1, 'HLTKEY' : "someHLTKey" }, transaction = False) insertLumiDAO = daoFactory(classname = "RunConfig.InsertLumiSection") insertLumiDAO.execute(binds = { 'RUN' : 1, 'LUMI' : 1 }, transaction = False) insertLumiDAO.execute(binds = { 'RUN' : 1, 'LUMI' : 2 }, transaction = False) insertLumiDAO.execute(binds = { 'RUN' : 1, 'LUMI' : 3 }, transaction = False) insertLumiDAO.execute(binds = { 'RUN' : 1, 'LUMI' : 4 }, transaction = False) insertLumiDAO.execute(binds = { 'RUN' : 1, 'LUMI' : 5 }, transaction = False) insertStreamDAO = daoFactory(classname = "RunConfig.InsertStream") insertStreamDAO.execute(binds = { 'STREAM' : "A" }, transaction = False) insertCMSSVersionDAO = daoFactory(classname = "RunConfig.InsertCMSSWVersion") insertCMSSVersionDAO.execute(binds = { 'VERSION' : "CMSSW_4_2_7" }, transaction = False) insertStreamCMSSWVersionDAO = daoFactory(classname = "RunConfig.InsertStreamCMSSWVersion") insertStreamCMSSWVersionDAO.execute(binds = { 'RUN' : 1, 'STREAM' : 'A', 'VERSION' : "CMSSW_4_2_7" }, transaction = False) insertStreamerDAO = daoFactory(classname = "RunConfig.InsertStreamer") insertStreamerDAO.execute(binds = { 'RUN' : 1, 'LUMI' : 4, 'STREAM' : "A", 'LFN' : "/testLFN/A", 'FILESIZE' : 100, 'EVENTS' : 100, 'TIME' : int(time.time()) }, transaction = False) insertStreamFilesetDAO = daoFactory(classname = "RunConfig.InsertStreamFileset") insertStreamFilesetDAO.execute(1, "A", "TestFileset1") self.fileset1 = Fileset(name = "TestFileset1") self.fileset2 = Fileset(name = "TestFileset2") self.fileset1.load() self.fileset2.create() workflow1 = Workflow(spec = "spec.xml", owner = "hufnagel", name = "TestWorkflow1", task="Test") workflow2 = Workflow(spec = "spec.xml", owner = "hufnagel", name = "TestWorkflow2", task="Test") workflow1.create() workflow2.create() self.subscription1 = Subscription(fileset = self.fileset1, workflow = workflow1, split_algo = "Repack", type = "Repack") self.subscription2 = Subscription(fileset = self.fileset2, workflow = workflow2, split_algo = "RepackMerge", type = "RepackMerge") self.subscription1.create() self.subscription2.create() myThread.dbi.processData("""INSERT INTO wmbs_workflow_output (WORKFLOW_ID, OUTPUT_IDENTIFIER, OUTPUT_FILESET) VALUES (%d, 'SOMEOUTPUT', %d) """ % (workflow1.id, self.fileset2.id), transaction = False) # keep for later self.insertSplitLumisDAO = daoFactory(classname = "JobSplitting.InsertSplitLumis") self.insertClosedLumiDAO = daoFactory(classname = "RunLumiCloseout.InsertClosedLumi") self.feedStreamersDAO = daoFactory(classname = "Tier0Feeder.FeedStreamers") self.acquireFilesDAO = wmbsDaoFactory(classname = "Subscriptions.AcquireFiles") self.completeFilesDAO = wmbsDaoFactory(classname = "Subscriptions.CompleteFiles") self.currentTime = int(time.time()) # default split parameters self.splitArgs = {} self.splitArgs['minInputSize'] = 2.1 * 1024 * 1024 * 1024 self.splitArgs['maxInputSize'] = 4.0 * 1024 * 1024 * 1024 self.splitArgs['maxInputEvents'] = 100000000 self.splitArgs['maxInputFiles'] = 1000 self.splitArgs['maxEdmSize'] = 20 * 1024 * 1024 * 1024 self.splitArgs['maxOverSize'] = 10 * 1024 * 1024 * 1024 return
def __init__(self, config): self.config = config BasePlugin.__init__(self, config) self.locationDict = {} myThread = threading.currentThread() daoFactory = DAOFactory(package="WMCore.WMBS", logger = myThread.logger, dbinterface = myThread.dbi) self.locationAction = daoFactory(classname = "Locations.GetSiteInfo") self.packageDir = None if os.path.exists(os.path.join(getWMBASE(), 'src/python/WMCore/WMRuntime/Unpacker.py')): self.unpacker = os.path.join(getWMBASE(), 'src/python/WMCore/WMRuntime/Unpacker.py') else: self.unpacker = os.path.join(getWMBASE(), 'WMCore/WMRuntime/Unpacker.py') self.agent = getattr(config.Agent, 'agentName', 'WMAgent') self.sandbox = None self.scriptFile = None self.submitDir = None self.removeTime = getattr(config.BossAir, 'removeTime', 60) self.useGSite = getattr(config.BossAir, 'useGLIDEINSites', False) self.submitWMSMode = getattr(config.BossAir, 'submitWMSMode', False) self.errorThreshold= getattr(config.BossAir, 'submitErrorThreshold', 10) self.errorCount = 0 self.defaultTaskPriority = getattr(config.BossAir, 'defaultTaskPriority', 0) self.maxTaskPriority = getattr(config.BossAir, 'maxTaskPriority', 1e7) # Required for global pool accounting self.acctGroup = getattr(config.BossAir, 'acctGroup', "production") self.acctGroupUser = getattr(config.BossAir, 'acctGroupUser', "cmsdataops") # Build ourselves a pool self.pool = [] self.input = None self.result = None self.nProcess = getattr(self.config.BossAir, 'nCondorProcesses', 4) # Set up my proxy and glexec stuff self.setupScript = getattr(config.BossAir, 'UISetupScript', None) self.proxy = None self.serverCert = getattr(config.BossAir, 'delegatedServerCert', None) self.serverKey = getattr(config.BossAir, 'delegatedServerKey', None) self.myproxySrv = getattr(config.BossAir, 'myproxyServer', None) self.proxyDir = getattr(config.BossAir, 'proxyDir', '/tmp/') self.serverHash = getattr(config.BossAir, 'delegatedServerHash', None) self.glexecPath = getattr(config.BossAir, 'glexecPath', None) self.glexecWrapScript = getattr(config.BossAir, 'glexecWrapScript', None) self.glexecUnwrapScript = getattr(config.BossAir, 'glexecUnwrapScript', None) self.jdlProxyFile = None # Proxy name to put in JDL (owned by submit user) self.glexecProxyFile = None # Copy of same file owned by submit user if self.glexecPath: if not (self.myproxySrv and self.proxyDir): raise WMException('glexec requires myproxyServer and proxyDir to be set.') if self.myproxySrv: if not (self.serverCert and self.serverKey): raise WMException('MyProxy server requires serverCert and serverKey to be set.') # Make the directory for the proxies if self.proxyDir and not os.path.exists(self.proxyDir): logging.debug("proxyDir not found: creating it.") try: os.makedirs(self.proxyDir, 0o1777) except Exception as ex: msg = "Error: problem when creating proxyDir directory - '%s'" % str(ex) raise BossAirPluginException(msg) elif not os.path.isdir(self.proxyDir): msg = "Error: proxyDir '%s' is not a directory" % self.proxyDir raise BossAirPluginException(msg) if self.serverCert and self.serverKey and self.myproxySrv: self.proxy = self.setupMyProxy() # Build a request string self.reqStr = "(Memory >= 1 && OpSys == \"LINUX\" ) && (Arch == \"INTEL\" || Arch == \"X86_64\") && stringListMember(GLIDEIN_CMSSite, DESIRED_Sites) && ((REQUIRED_OS==\"any\") || (GLIDEIN_REQUIRED_OS==REQUIRED_OS))" if hasattr(config.BossAir, 'condorRequirementsString'): self.reqStr = config.BossAir.condorRequirementsString return
def algorithm(self, groupInstance = None, jobInstance = None, *args, **kwargs): """ _algorithm_ A file based splitting algorithm """ # extract some global scheduling parameters self.jobNamePrefix = kwargs.get('jobNamePrefix', "Repack") self.maxSizeSingleLumi = kwargs['maxSizeSingleLumi'] self.maxSizeMultiLumi = kwargs['maxSizeMultiLumi'] self.maxInputEvents = kwargs['maxInputEvents'] self.maxInputFiles = kwargs['maxInputFiles'] self.createdGroup = False timePerEvent, sizePerEvent, memoryRequirement = \ self.getPerformanceParameters(kwargs.get('performance', {})) myThread = threading.currentThread() daoFactory = DAOFactory(package = "T0.WMBS", logger = logging, dbinterface = myThread.dbi) maxLumiWithJobDAO = daoFactory(classname = "Subscriptions.MaxLumiWithJob") getClosedEmptyLumisDAO = daoFactory(classname = "JobSplitting.GetClosedEmptyLumis") # keep for later self.insertSplitLumisDAO = daoFactory(classname = "JobSplitting.InsertSplitLumis") # data discovery getFilesDAO = daoFactory(classname = "Subscriptions.GetAvailableRepackFiles") availableFiles = getFilesDAO.execute(self.subscription["id"]) # nothing to do, stop immediately if len(availableFiles) == 0: return # lumis we have data for lumiList = set([]) for result in availableFiles: lumiList.add(result['lumi']) lumiList = sorted(list(lumiList)) # highest lumi with a job maxLumiWithJob = 0 if lumiList[0] > 1: maxLumiWithJob = maxLumiWithJobDAO.execute(self.subscription["id"]) # consistency check if lumiList[0] <= maxLumiWithJob: logging.error("ERROR: finding data that can't be there, bailing out...") return # do we have lumi holes ? detectEmptyLumis = False lumi = maxLumiWithJob + 1 while lumi in lumiList: lumi += 1 if lumi < lumiList[-1]: detectEmptyLumis = True # empty and closed lumis emptyLumis = [] if detectEmptyLumis: emptyLumis = getClosedEmptyLumisDAO.execute(self.subscription["id"], maxLumiWithJob) # figure out lumi range to create jobs for streamersByLumi = {} firstLumi = maxLumiWithJob + 1 lastLumi = lumiList[-1] for lumi in range(firstLumi, lastLumi + 1): if (lumi in lumiList) or (lumi in emptyLumis): streamersByLumi[lumi] = [] else: break # figure out what data to create jobs for for fileInfo in availableFiles: lumi = fileInfo['lumi'] if streamersByLumi.has_key(lumi): streamersByLumi[lumi].append(fileInfo) # check if fileset is closed fileset = self.subscription.getFileset() fileset.load() self.defineJobs(streamersByLumi, fileset.open, memoryRequirement) return
def stuffDatabase(self): """ _stuffDatabase_ Fill the dbsbuffer with some files and blocks. We'll insert a total of 5 files spanning two blocks. There will be a total of two datasets inserted into the database. All files will be already in GLOBAL and in_phedex """ myThread = threading.currentThread() buffer3Factory = DAOFactory(package="WMComponent.DBS3Buffer", logger=myThread.logger, dbinterface=myThread.dbi) insertWorkflow = buffer3Factory(classname="InsertWorkflow") insertWorkflow.execute("BogusRequestA", "BogusTask", 0, 0, 0, 0) insertWorkflow.execute("BogusRequestB", "BogusTask", 0, 0, 0, 0) checksums = {"adler32": "1234", "cksum": "5678"} testFileA = DBSBufferFile(lfn=makeUUID(), size=1024, events=10, checksums=checksums, locations=set(["srm-cms.cern.ch"])) testFileA.setAlgorithm(appName="cmsRun", appVer="CMSSW_2_1_8", appFam="RECO", psetHash="GIBBERISH", configContent="MOREGIBBERISH") testFileA.setDatasetPath(self.testDatasetA) testFileA.addRun(Run(2, *[45])) testFileA.create() testFileB = DBSBufferFile(lfn=makeUUID(), size=1024, events=10, checksums=checksums, locations=set(["srm-cms.cern.ch"])) testFileB.setAlgorithm(appName="cmsRun", appVer="CMSSW_2_1_8", appFam="RECO", psetHash="GIBBERISH", configContent="MOREGIBBERISH") testFileB.setDatasetPath(self.testDatasetA) testFileB.addRun(Run(2, *[45])) testFileB.create() testFileC = DBSBufferFile(lfn=makeUUID(), size=1024, events=10, checksums=checksums, locations=set(["srm-cms.cern.ch"])) testFileC.setAlgorithm(appName="cmsRun", appVer="CMSSW_2_1_8", appFam="RECO", psetHash="GIBBERISH", configContent="MOREGIBBERISH") testFileC.setDatasetPath(self.testDatasetA) testFileC.addRun(Run(2, *[45])) testFileC.create() self.testFilesA.append(testFileA) self.testFilesA.append(testFileB) self.testFilesA.append(testFileC) testFileD = DBSBufferFile(lfn=makeUUID(), size=1024, events=10, checksums=checksums, locations=set(["srm-cms.cern.ch"])) testFileD.setAlgorithm(appName="cmsRun", appVer="CMSSW_2_1_8", appFam="RECO", psetHash="GIBBERISH", configContent="MOREGIBBERISH") testFileD.setDatasetPath(self.testDatasetB) testFileD.addRun(Run(2, *[45])) testFileD.create() testFileE = DBSBufferFile(lfn=makeUUID(), size=1024, events=10, checksums=checksums, locations=set(["srm-cms.cern.ch"])) testFileE.setAlgorithm(appName="cmsRun", appVer="CMSSW_2_1_8", appFam="RECO", psetHash="GIBBERISH", configContent="MOREGIBBERISH") testFileE.setDatasetPath(self.testDatasetB) testFileE.addRun(Run(2, *[45])) testFileE.create() self.testFilesB.append(testFileD) self.testFilesB.append(testFileE) uploadFactory = DAOFactory(package="WMComponent.DBS3Buffer", logger=myThread.logger, dbinterface=myThread.dbi) datasetAction = uploadFactory(classname="NewDataset") createAction = uploadFactory(classname="CreateBlocks") datasetAction.execute(datasetPath=self.testDatasetA) datasetAction.execute(datasetPath=self.testDatasetB) self.blockAName = self.testDatasetA + "#" + makeUUID() self.blockBName = self.testDatasetB + "#" + makeUUID() newBlockA = DBSBufferBlock(name=self.blockAName, location="srm-cms.cern.ch", datasetpath=None) newBlockA.setDataset(self.testDatasetA, 'data', 'VALID') newBlockA.status = 'Closed' newBlockB = DBSBufferBlock(name=self.blockBName, location="srm-cms.cern.ch", datasetpath=None) newBlockB.setDataset(self.testDatasetB, 'data', 'VALID') newBlockB.status = 'Closed' createAction.execute(blocks=[newBlockA, newBlockB]) bufferFactory = DAOFactory(package="WMComponent.DBS3Buffer", logger=myThread.logger, dbinterface=myThread.dbi) setBlock = bufferFactory(classname="DBSBufferFiles.SetBlock") setBlock.execute(testFileA["lfn"], self.blockAName) setBlock.execute(testFileB["lfn"], self.blockAName) setBlock.execute(testFileC["lfn"], self.blockAName) setBlock.execute(testFileD["lfn"], self.blockBName) setBlock.execute(testFileE["lfn"], self.blockBName) fileStatus = bufferFactory(classname="DBSBufferFiles.SetStatus") fileStatus.execute(testFileA["lfn"], "GLOBAL") fileStatus.execute(testFileB["lfn"], "GLOBAL") fileStatus.execute(testFileC["lfn"], "GLOBAL") fileStatus.execute(testFileD["lfn"], "GLOBAL") fileStatus.execute(testFileE["lfn"], "GLOBAL") phedexStatus = bufferFactory(classname="DBSBufferFiles.SetPhEDExStatus") phedexStatus.execute(testFileA["lfn"], 1) phedexStatus.execute(testFileB["lfn"], 1) phedexStatus.execute(testFileC["lfn"], 1) phedexStatus.execute(testFileD["lfn"], 1) phedexStatus.execute(testFileE["lfn"], 1) associateWorkflow = buffer3Factory(classname="DBSBufferFiles.AssociateWorkflowToFile") associateWorkflow.execute(testFileA["lfn"], "BogusRequestA", "BogusTask") associateWorkflow.execute(testFileB["lfn"], "BogusRequestA", "BogusTask") associateWorkflow.execute(testFileC["lfn"], "BogusRequestA", "BogusTask") associateWorkflow.execute(testFileD["lfn"], "BogusRequestB", "BogusTask") associateWorkflow.execute(testFileE["lfn"], "BogusRequestB", "BogusTask") # Make the desired subscriptions insertSubAction = buffer3Factory(classname="NewSubscription") datasetA = DBSBufferDataset(path=self.testDatasetA) datasetB = DBSBufferDataset(path=self.testDatasetB) workload = WMWorkloadHelper() workload.load(os.path.join(getTestBase(), 'WMComponent_t/PhEDExInjector_t/specs/TestWorkload.pkl')) insertSubAction.execute(datasetA.exists(), workload.getSubscriptionInformation()[self.testDatasetA]) insertSubAction.execute(datasetB.exists(), workload.getSubscriptionInformation()[self.testDatasetB]) return
def __init__(self, wmSpec, taskName, blockName=None, mask=None, cachepath='.'): """ _init_ Initialize DAOs and other things needed. """ self.block = blockName self.mask = mask self.wmSpec = wmSpec self.topLevelTask = wmSpec.getTask(taskName) self.cachepath = cachepath self.isDBS = True self.topLevelFileset = None self.topLevelSubscription = None self.topLevelTaskDBSBufferId = None self.mergeOutputMapping = {} # Initiate the pieces you need to run your own DAOs WMConnectionBase.__init__(self, "WMCore.WMBS") myThread = threading.currentThread() self.dbsDaoFactory = DAOFactory(package="WMComponent.DBS3Buffer", logger=myThread.logger, dbinterface=myThread.dbi) # DAOs from WMBS for file commit self.setParentage = self.daofactory(classname="Files.SetParentage") self.setFileRunLumi = self.daofactory(classname="Files.AddRunLumi") self.setFileLocation = self.daofactory( classname="Files.SetLocationForWorkQueue") self.setFileAddChecksum = self.daofactory( classname="Files.AddChecksumByLFN") self.addFileAction = self.daofactory(classname="Files.Add") self.addToFileset = self.daofactory(classname="Files.AddDupsToFileset") self.getLocations = self.daofactory(classname="Locations.ListSites") self.getLocationInfo = self.daofactory( classname="Locations.GetSiteInfo") # DAOs from DBSBuffer self.dbsCreateFiles = self.dbsDaoFactory( classname="DBSBufferFiles.Add") self.dbsSetLocation = self.dbsDaoFactory( classname="DBSBufferFiles.SetLocationByLFN") self.dbsInsertLocation = self.dbsDaoFactory( classname="DBSBufferFiles.AddLocation") self.dbsSetChecksum = self.dbsDaoFactory( classname="DBSBufferFiles.AddChecksumByLFN") self.dbsInsertWorkflow = self.dbsDaoFactory(classname="InsertWorkflow") # Added for file creation bookkeeping self.dbsFilesToCreate = [] self.addedLocations = [] self.wmbsFilesToCreate = [] self.insertedBogusDataset = -1 return
def injectNewData(dbInterfaceStorageManager, dbInterfaceHltConf, dbInterfaceSMNotify, streamerPNN, minRun=None, maxRun=None, injectRun=None): """ _injectNewData_ Replaces the old-style file notification injecton into the Tier0. Queries the StorageManager database for new data and injects it into the Tier0. These queries will find duplicates, ie. data that was already found and processed in a previous polling cycle. Code has to be robust against that. Needs to be passed the PNN on which streamer files are located """ logging.debug("injectNewData()") myThread = threading.currentThread() daoFactory = DAOFactory(package="T0.WMBS", logger=logging, dbinterface=myThread.dbi) daoFactoryStorageManager = DAOFactory( package="T0.WMBS", logger=logging, dbinterface=dbInterfaceStorageManager) daoFactoryHltConf = DAOFactory(package="T0.WMBS", logger=logging, dbinterface=dbInterfaceHltConf) if dbInterfaceSMNotify: daoFactorySMNotify = DAOFactory(package="T0.WMBS", logger=logging, dbinterface=dbInterfaceSMNotify) insertFileStatusDAO = daoFactorySMNotify( classname="SMNotification.InsertOfflineFileStatus") getNewDataDAO = daoFactoryStorageManager( classname="StorageManager.GetNewData") getRunInfoDAO = daoFactoryHltConf(classname="StorageManager.GetRunInfo") insertRunDAO = daoFactory(classname="RunConfig.InsertRun") insertStreamDAO = daoFactory(classname="RunConfig.InsertStream") insertCMSSWVersionDAO = daoFactory( classname="RunConfig.InsertCMSSWVersion") insertStreamCMSSWVersionDAO = daoFactory( classname="RunConfig.InsertStreamCMSSWVersion") insertLumiDAO = daoFactory(classname="RunConfig.InsertLumiSection") insertStreamerDAO = daoFactory(classname="RunConfig.InsertStreamer") newData = getNewDataDAO.execute(minRun=minRun, maxRun=maxRun, injectRun=injectRun, transaction=False) # remove already processed files newData[:] = [ newFile for newFile in newData if newFile['p5_id'] not in knownStreamers ] logging.debug("StoragemanagerAPI: found %d new files", len(newData)) newRuns = set() newRunStreams = {} for newFile in newData: run = newFile['run'] stream = newFile['stream'] newRuns.add(newFile['run']) if run not in newRunStreams: newRunStreams[run] = set() if stream not in newRunStreams[run]: newRunStreams[run].add(stream) logging.debug("StoragemanagerAPI: found %d new runs", len(newRuns)) cmsswVersions = set() streams = set() bindRunHltKey = [] bindRunStreamCMSSW = [] for run in sorted(list(newRuns)): (hltkey, cmssw) = getRunInfoDAO.execute(run=run, transaction=False) logging.debug("StorageManagerAPI: run = %d, hltkey = %s, cmssw = %s", run, hltkey, cmssw) if hltkey and cmssw: cmssw = '_'.join( cmssw.split('_')[0:4]) # only consider base release cmsswVersions.add(cmssw) bindRunHltKey.append({'RUN': run, 'HLTKEY': hltkey}) for stream in newRunStreams[run]: streams.add(stream) bindRunStreamCMSSW.append({ 'RUN': run, 'STREAM': stream, 'VERSION': cmssw }) else: # can't retrieve hltkey and cmssw for run, ignore any data for it newRuns.remove(run) if len(bindRunHltKey) > 0: insertRunDAO.execute(binds=bindRunHltKey, transaction=False) bindStream = [] for stream in streams: bindStream.append({'STREAM': stream}) if len(bindStream) > 0: insertStreamDAO.execute(binds=bindStream, transaction=False) bindCMSSW = [] for cmssw in cmsswVersions: bindCMSSW.append({'VERSION': cmssw}) if len(bindCMSSW) > 0: insertCMSSWVersionDAO.execute(binds=bindCMSSW, transaction=False) if len(bindRunStreamCMSSW) > 0: insertStreamCMSSWVersionDAO.execute(binds=bindRunStreamCMSSW, transaction=False) lumis = set() bindStreamer = [] bindInsertFileStatus = [] for newFile in newData: run = newFile['run'] if run not in newRuns: continue lumi = newFile['lumi'] lumis.add((run, lumi)) if newFile[ 'filename'] == 'run289461_ls0020_streamExpressCosmics_StorageManager.dat': newFile[ 'path'] = '/store/t0streamer/Data/ExpressCosmics/000/289/461' bindStreamer.append({ 'LFN': newFile['path'] + '/' + newFile['filename'], 'P5_ID': newFile['p5_id'], 'RUN': run, 'LUMI': lumi, 'STREAM': newFile['stream'], 'FILESIZE': newFile['filesize'], 'EVENTS': newFile['events'], 'TIME': int(time.time()) }) if dbInterfaceSMNotify: bindInsertFileStatus.append({ 'P5_ID': newFile['p5_id'], 'FILENAME': newFile['filename'] }) bindLumi = [] for lumi in lumis: bindLumi.append({'RUN': lumi[0], 'LUMI': lumi[1]}) if len(bindLumi) > 0: insertLumiDAO.execute(binds=bindLumi, transaction=False) if len(bindStreamer) > 0: insertStreamerDAO.execute(streamerPNN, binds=bindStreamer, transaction=False) if len(bindInsertFileStatus) > 0: insertFileStatusDAO.execute(bindInsertFileStatus, transaction=False) for x in bindStreamer: knownStreamers.add(x['P5_ID']) return
def __call__(self, filesetToProcess): """ The algorithm itself """ global LOCK # Get configuration initObj = WMInit() initObj.setLogging() initObj.setDatabaseConnection(os.getenv("DATABASE"), \ os.getenv('DIALECT'), os.getenv("DBSOCK")) myThread = threading.currentThread() daofactory = DAOFactory(package = "WMCore.WMBS" , \ logger = myThread.logger, \ dbinterface = myThread.dbi) locationNew = daofactory(classname="Locations.New") getFileLoc = daofactory(classname="Files.GetLocation") logging.debug("the T0Feeder is processing %s" % \ filesetToProcess.name) logging.debug("the fileset name %s" % \ (filesetToProcess.name).split(":")[0]) startRun = (filesetToProcess.name).split(":")[3] fileType = (filesetToProcess.name).split(":")[2] # url builder primaryDataset = ((filesetToProcess.name).split(":")[0]).split('/')[1] processedDataset = (( filesetToProcess.name).split(":")[0]).split('/')[2] dataTier = (((filesetToProcess.name\ ).split(":")[0]).split('/')[3]).split('-')[0] # Fisrt call to T0 db for this fileset # Here add test for the closed fileset LASTIME = filesetToProcess.lastUpdate url = "/tier0/listfilesoverinterval/%s/%s/%s/%s/%s" % \ (fileType, LASTIME, primaryDataset,processedDataset, dataTier) tries = 1 while True: try: myRequester = JSONRequests(url="vocms52.cern.ch:8889") requestResult = myRequester.get(\ url+"/"+"?return_type=text/json%2Bdas") newFilesList = requestResult[0]["results"] except: logging.debug("T0Reader call error...") if tries == self.maxRetries: return else: tries += 1 continue logging.debug("T0ASTRun queries done ...") now = time.time() filesetToProcess.last_update = now LASTIME = int(newFilesList['end_time']) + 1 break # process all files if len(newFilesList['files']): LOCK.acquire() try: locationNew.execute(siteName="caf.cern.ch", seName="caf.cern.ch") except Exception as e: logging.debug("Error when adding new location...") logging.debug(e) logging.debug(format_exc()) for files in newFilesList['files']: # Assume parents aren't asked newfile = File(str(files['lfn']), \ size = files['file_size'], events = files['events']) try: if newfile.exists() == False: newfile.create() else: newfile.loadData() #Add run test if already exist for run in files['runs']: if startRun != 'None' and int(startRun) <= int(run): # ToDo: Distinguish between # filestA-RunX and filesetA-Run[0-9]* filesetRun = Fileset( name = (((\ filesetToProcess.name).split(':')[0]).split('/')[0]\ )+'/'+(((filesetToProcess.name).split(':')[0]).split\ ('/')[1])+'/'+(((filesetToProcess.name).split(':')[0]\ ).split('/')[2])+'/'+((((filesetToProcess.name).split\ (':')[0]).split('/')[3]).split('-')[0])+'-'+'Run'+str\ (run)+":"+":".join((filesetToProcess.name).split(':')[1:] \ ) ) if filesetRun.exists() == False: filesetRun.create() else: filesetRun.loadData() # Add test runs already there # (for growing dataset) - # to support file with different runs and lumi if not newfile['runs']: runSet = set() runSet.add(Run(run, *files['runs'][run])) newfile.addRunSet(runSet) fileLoc = getFileLoc.execute(file=files['lfn']) if 'caf.cern.ch' not in fileLoc: newfile.setLocation("caf.cern.ch") filesetRun.addFile(newfile) logging.debug( "new file created/loaded added by T0ASTRun...") filesetRun.commit() except Exception as e: logging.debug("Error when adding new files in T0ASTRun...") logging.debug(e) logging.debug(format_exc()) filesetToProcess.setLastUpdate\ (int(newFilesList['end_time']) + 1) filesetToProcess.commit() LOCK.release() else: logging.debug("nothing to do...") # For re-opned fileset or empty, try until the purge time if (int(now) / 3600 - LASTIME / 3600) > self.reopenTime: filesetToProcess.setLastUpdate(time.time()) filesetToProcess.commit() if LASTIME: myRequester = JSONRequests(url="vocms52.cern.ch:8889") requestResult = myRequester.get("/tier0/runs") for listRun in requestResult[0]: if int(startRun) <= int(listRun['run']): if listRun['status'] =='CloseOutExport' or \ listRun['status'] =='Complete' or listRun['status'] ==\ 'CloseOutT1Skimming': closeFileset = Fileset( name = (((\ filesetToProcess.name).split(':')[0]).split('/')[0])+'/'+\ (((filesetToProcess.name).split(':')[0]).split('/')[1]\ )+'/'+(((filesetToProcess.name).split(':')[0]).split('/')\ [2])+'/'+((((filesetToProcess.name).split(':')[0]).split\ ('/')[3]).split('-')[0])+'-'+'Run'+str(listRun['run'])\ +":"+":".join((filesetToProcess.name).split(':')[1:] ) ) if closeFileset.exists() != False: closeFileset = Fileset(id=closeFileset.exists()) closeFileset.loadData() if closeFileset.open == True: closeFileset.markOpen(False) # Commit the fileset filesetToProcess.commit() # Commit the fileset logging.debug("Test purge in T0ASTRun ...") filesetToProcess.load() LASTIME = filesetToProcess.lastUpdate if (int(now) / 3600 - LASTIME / 3600) > self.purgeTime: filesetToProcess.markOpen(False) logging.debug("Purge Done...") filesetToProcess.commit()
def testPublishJSONCreate(self): """ Re-run testA_BasicFunctionTest with data in DBSBuffer Make sure files are generated """ # Set up uploading and write them elsewhere since the test deletes them. self.uploadPublishInfo = True self.uploadPublishDir = self.testDir # Insert some DBSFiles testFileChildA = DBSBufferFile(lfn="/this/is/a/child/lfnA", size=1024, events=20) testFileChildA.setAlgorithm(appName="cmsRun", appVer="CMSSW_2_1_8", appFam="RECO", psetHash="GIBBERISH", configContent="MOREGIBBERISH") testFileChildB = DBSBufferFile(lfn="/this/is/a/child/lfnB", size=1024, events=20) testFileChildB.setAlgorithm(appName="cmsRun", appVer="CMSSW_2_1_8", appFam="RECO", psetHash="GIBBERISH", configContent="MOREGIBBERISH") testFileChildC = DBSBufferFile(lfn="/this/is/a/child/lfnC", size=1024, events=20) testFileChildC.setAlgorithm(appName="cmsRun", appVer="CMSSW_2_1_8", appFam="RECO", psetHash="GIBBERISH", configContent="MOREGIBBERISH") testFileChildA.setDatasetPath("/Cosmics/USER-DATASET1-v1/USER") testFileChildB.setDatasetPath("/Cosmics/USER-DATASET1-v1/USER") testFileChildC.setDatasetPath("/Cosmics/USER-DATASET2-v1/USER") testFileChildA.create() testFileChildB.create() testFileChildC.create() testFile = DBSBufferFile(lfn="/this/is/a/lfn", size=1024, events=10) testFile.setAlgorithm(appName="cmsRun", appVer="CMSSW_2_1_8", appFam="RECO", psetHash="GIBBERISH", configContent="MOREGIBBERISH") testFile.setDatasetPath("/Cosmics/CRUZET09-PromptReco-v1/RECO") testFile.create() testFileChildA.addParents([testFile["lfn"]]) testFileChildB.addParents([testFile["lfn"]]) testFileChildC.addParents([testFile["lfn"]]) myThread = threading.currentThread() self.dbsDaoFactory = DAOFactory(package="WMComponent.DBS3Buffer", logger=myThread.logger, dbinterface=myThread.dbi) self.insertWorkflow = self.dbsDaoFactory(classname="InsertWorkflow") workflowID = self.insertWorkflow.execute( requestName='TestWorkload', taskPath='TestWorkload/Analysis', blockMaxCloseTime=100, blockMaxFiles=100, blockMaxEvents=100, blockMaxSize=100) myThread.dbi.processData( "update dbsbuffer_file set workflow=1 where id < 4") # Run the test again self.testA_BasicFunctionTest() # Reset default values self.uploadPublishInfo = False self.uploadPublishDir = None # Make sure the files are there self.assertTrue( os.path.exists( os.path.join(self.testDir, 'TestWorkload_publish.json'))) self.assertTrue( os.path.getsize( os.path.join(self.testDir, 'TestWorkload_publish.json')) > 100) self.assertTrue( os.path.exists( os.path.join(self.testDir, 'TestWorkload_publish.tgz'))) return
def uploadConditions(username, password, serviceProxy): """ _uploadConditions_ Called by Tier0Feeder in every polling cycle Determine PCL status incl. files for upload for all run/stream combos that are not finished yet. Loop through the runs, uploading files for all streams. If the run/stream upload subscription is finished, mark that run/stream PCL as finished. Terminate the loop on the first run that has not completely finished streams, but only within a certain timeout based on the runs end time (either from the EoR record or based on the insertion time of the last streamer file). """ logging.debug("uploadConditions()") myThread = threading.currentThread() daoFactory = DAOFactory(package="T0.WMBS", logger=logging, dbinterface=myThread.dbi) getConditionsDAO = daoFactory(classname="ConditionUpload.GetConditions") completeFilesDAO = daoFactory(classname="ConditionUpload.CompleteFiles") finishPCLforEmptyExpressDAO = daoFactory( classname="ConditionUpload.FinishPCLforEmptyExpress") isPromptCalibrationFinishedDAO = daoFactory( classname="ConditionUpload.IsPromptCalibrationFinished") markPromptCalibrationFinishedDAO = daoFactory( classname="ConditionUpload.MarkPromptCalibrationFinished") # look at all runs which are finished with conditions uploads # check for late arriving payloads and upload them conditions = getConditionsDAO.execute(finished=True, transaction=False) for (index, run) in enumerate(sorted(conditions.keys()), 1): dropboxHost = conditions[run]['dropboxHost'] validationMode = conditions[run]['validationMode'] for streamid, uploadableFiles in conditions[run]['streams'].items(): if len(uploadableFiles) > 0: uploadedFiles = uploadToDropbox(uploadableFiles, dropboxHost, validationMode, username, password, serviceProxy) if len(uploadedFiles) > 0: bindVarList = [] for uploadedFile in uploadedFiles: bindVarList.append({ 'FILEID': uploadedFile['fileid'], 'SUBSCRIPTION': uploadedFile['subscription'] }) # need a transaction here so we don't have files in # state acquired and complete at the same time try: myThread.transaction.begin() completeFilesDAO.execute( bindVarList, conn=myThread.transaction.conn, transaction=True) except: myThread.transaction.rollback() raise else: myThread.transaction.commit() # check for pathological runs with no express data that will never # create conditions for upload and set them to finished finishPCLforEmptyExpressDAO.execute(transaction=False) # look at all runs not completely finished with condition uploads # return acquired (to be uploaded) files for them conditions = getConditionsDAO.execute(finished=False, transaction=False) for (index, run) in enumerate(sorted(conditions.keys()), 1): advanceToNextRun = True timeout = conditions[run]['condUploadTimeout'] dropboxHost = conditions[run]['dropboxHost'] validationMode = conditions[run]['validationMode'] for streamid, uploadableFiles in conditions[run]['streams'].items(): if len(uploadableFiles) > 0: uploadedFiles = uploadToDropbox(uploadableFiles, dropboxHost, validationMode, username, password, serviceProxy) if len(uploadedFiles) > 0: bindVarList = [] for uploadedFile in uploadedFiles: bindVarList.append({ 'FILEID': uploadedFile['fileid'], 'SUBSCRIPTION': uploadedFile['subscription'] }) # need a transaction here so we don't have files in # state acquired and complete at the same time try: myThread.transaction.begin() completeFilesDAO.execute( bindVarList, conn=myThread.transaction.conn, transaction=True) except: myThread.transaction.rollback() raise else: myThread.transaction.commit() # check if all files for run/stream uploaded (that means only complete # files for same number of subscriptions as number of producers) markPromptCalibrationFinishedDAO.execute(run, streamid, transaction=False) else: # upload failed advanceToNextRun = False else: # no files available for upload yet advanceToNextRun = False # check if all streams for run finished if advanceToNextRun: finished = isPromptCalibrationFinishedDAO.execute( run, transaction=False) if not finished: advanceToNextRun = False # check for timeout, but only if there is a next run if not advanceToNextRun and index < len(conditions.keys()): getRunStopTimeDAO = daoFactory( classname="ConditionUpload.GetRunStopTime") stopTime = getRunStopTimeDAO.execute(run, transaction=False) if time.time() < stopTime + timeout: break return
def stuffDatabase(self): """ _stuffDatabase_ Fill the dbsbuffer with some files and blocks. We'll insert a total of 5 files spanning two blocks. There will be a total of two datasets inserted into the datbase. We'll inject files with the location set as an SE name as well as a PhEDEx node name as well. """ myThread = threading.currentThread() buffer3Factory = DAOFactory(package="WMComponent.DBS3Buffer", logger=myThread.logger, dbinterface=myThread.dbi) insertWorkflow = buffer3Factory(classname="InsertWorkflow") insertWorkflow.execute("BogusRequest", "BogusTask", 0, 0, 0, 0) checksums = {"adler32": "1234", "cksum": "5678"} testFileA = DBSBufferFile(lfn=makeUUID(), size=1024, events=10, checksums=checksums, locations=set(["srm-cms.cern.ch"])) testFileA.setAlgorithm(appName="cmsRun", appVer="CMSSW_2_1_8", appFam="RECO", psetHash="GIBBERISH", configContent="MOREGIBBERISH") testFileA.setDatasetPath(self.testDatasetA) testFileA.addRun(Run(2, *[45])) testFileA.create() testFileB = DBSBufferFile(lfn=makeUUID(), size=1024, events=10, checksums=checksums, locations=set(["srm-cms.cern.ch"])) testFileB.setAlgorithm(appName="cmsRun", appVer="CMSSW_2_1_8", appFam="RECO", psetHash="GIBBERISH", configContent="MOREGIBBERISH") testFileB.setDatasetPath(self.testDatasetA) testFileB.addRun(Run(2, *[45])) testFileB.create() testFileC = DBSBufferFile(lfn=makeUUID(), size=1024, events=10, checksums=checksums, locations=set(["srm-cms.cern.ch"])) testFileC.setAlgorithm(appName="cmsRun", appVer="CMSSW_2_1_8", appFam="RECO", psetHash="GIBBERISH", configContent="MOREGIBBERISH") testFileC.setDatasetPath(self.testDatasetA) testFileC.addRun(Run(2, *[45])) testFileC.create() self.testFilesA.append(testFileA) self.testFilesA.append(testFileB) self.testFilesA.append(testFileC) testFileD = DBSBufferFile(lfn=makeUUID(), size=1024, events=10, checksums=checksums, locations=set(["srm-cms.cern.ch"])) testFileD.setAlgorithm(appName="cmsRun", appVer="CMSSW_2_1_8", appFam="RECO", psetHash="GIBBERISH", configContent="MOREGIBBERISH") testFileD.setDatasetPath(self.testDatasetB) testFileD.addRun(Run(2, *[45])) testFileD.create() testFileE = DBSBufferFile(lfn=makeUUID(), size=1024, events=10, checksums=checksums, locations=set(["srm-cms.cern.ch"])) testFileE.setAlgorithm(appName="cmsRun", appVer="CMSSW_2_1_8", appFam="RECO", psetHash="GIBBERISH", configContent="MOREGIBBERISH") testFileE.setDatasetPath(self.testDatasetB) testFileE.addRun(Run(2, *[45])) testFileE.create() self.testFilesB.append(testFileD) self.testFilesB.append(testFileE) uploadFactory = DAOFactory(package="WMComponent.DBS3Buffer", logger=myThread.logger, dbinterface=myThread.dbi) datasetAction = uploadFactory(classname="NewDataset") createAction = uploadFactory(classname="CreateBlocks") datasetAction.execute(datasetPath=self.testDatasetA) datasetAction.execute(datasetPath=self.testDatasetB) self.blockAName = self.testDatasetA + "#" + makeUUID() self.blockBName = self.testDatasetB + "#" + makeUUID() newBlockA = DBSBufferBlock(name=self.blockAName, location="srm-cms.cern.ch", datasetpath=None) newBlockA.setDataset(self.testDatasetA, 'data', 'VALID') newBlockA.status = 'Closed' newBlockB = DBSBufferBlock(name=self.blockBName, location="srm-cms.cern.ch", datasetpath=None) newBlockB.setDataset(self.testDatasetB, 'data', 'VALID') newBlockB.status = 'Closed' createAction.execute(blocks=[newBlockA, newBlockB]) bufferFactory = DAOFactory(package="WMComponent.DBSBuffer.Database", logger=myThread.logger, dbinterface=myThread.dbi) setBlock = bufferFactory(classname="DBSBufferFiles.SetBlock") setBlock.execute(testFileA["lfn"], self.blockAName) setBlock.execute(testFileB["lfn"], self.blockAName) setBlock.execute(testFileC["lfn"], self.blockAName) setBlock.execute(testFileD["lfn"], self.blockBName) setBlock.execute(testFileE["lfn"], self.blockBName) fileStatus = bufferFactory(classname="DBSBufferFiles.SetStatus") fileStatus.execute(testFileA["lfn"], "LOCAL") fileStatus.execute(testFileB["lfn"], "LOCAL") fileStatus.execute(testFileC["lfn"], "LOCAL") fileStatus.execute(testFileD["lfn"], "LOCAL") fileStatus.execute(testFileE["lfn"], "LOCAL") associateWorkflow = buffer3Factory( classname="DBSBufferFiles.AssociateWorkflowToFile") associateWorkflow.execute(testFileA["lfn"], "BogusRequest", "BogusTask") associateWorkflow.execute(testFileB["lfn"], "BogusRequest", "BogusTask") associateWorkflow.execute(testFileC["lfn"], "BogusRequest", "BogusTask") associateWorkflow.execute(testFileD["lfn"], "BogusRequest", "BogusTask") associateWorkflow.execute(testFileE["lfn"], "BogusRequest", "BogusTask") return
def stuffDatabase(self): """ Fill the dbsbuffer tables with some files and blocks. We'll insert a total of 5 files spanning two blocks. There will be a total of two datasets inserted into the database. We'll inject files with the location set as an SE name as well as a PhEDEx node name as well. """ myThread = threading.currentThread() # Create the DAOs factory and the relevant instances buffer3Factory = DAOFactory(package="WMComponent.DBS3Buffer", logger=myThread.logger, dbinterface=myThread.dbi) setBlock = buffer3Factory(classname="DBSBufferFiles.SetBlock") fileStatus = buffer3Factory(classname="DBSBufferFiles.SetStatus") associateWorkflow = buffer3Factory(classname="DBSBufferFiles.AssociateWorkflowToFile") insertWorkflow = buffer3Factory(classname="InsertWorkflow") datasetAction = buffer3Factory(classname="NewDataset") createAction = buffer3Factory(classname="CreateBlocks") # Create workflow in the database insertWorkflow.execute("BogusRequest", "BogusTask", 0, 0, 0, 0) # First file on first block checksums = {"adler32": "1234", "cksum": "5678"} testFileA = DBSBufferFile(lfn=makeUUID(), size=1024, events=10, checksums=checksums, locations=set(["T2_CH_CERN"])) testFileA.setAlgorithm(appName="cmsRun", appVer="CMSSW_2_1_8", appFam="RECO", psetHash="GIBBERISH", configContent="MOREGIBBERISH") testFileA.setDatasetPath(self.testDatasetA) testFileA.addRun(Run(2, *[45])) testFileA.create() # Second file on first block testFileB = DBSBufferFile(lfn=makeUUID(), size=1024, events=10, checksums=checksums, locations=set(["T2_CH_CERN"])) testFileB.setAlgorithm(appName="cmsRun", appVer="CMSSW_2_1_8", appFam="RECO", psetHash="GIBBERISH", configContent="MOREGIBBERISH") testFileB.setDatasetPath(self.testDatasetA) testFileB.addRun(Run(2, *[45])) testFileB.create() # Third file on first block testFileC = DBSBufferFile(lfn=makeUUID(), size=1024, events=10, checksums=checksums, locations=set(["T2_CH_CERN"])) testFileC.setAlgorithm(appName="cmsRun", appVer="CMSSW_2_1_8", appFam="RECO", psetHash="GIBBERISH", configContent="MOREGIBBERISH") testFileC.setDatasetPath(self.testDatasetA) testFileC.addRun(Run(2, *[45])) testFileC.create() self.testFilesA.append(testFileA) self.testFilesA.append(testFileB) self.testFilesA.append(testFileC) # First file on second block testFileD = DBSBufferFile(lfn=makeUUID(), size=1024, events=10, checksums=checksums, locations=set(["T1_US_FNAL_Disk"])) testFileD.setAlgorithm(appName="cmsRun", appVer="CMSSW_2_1_8", appFam="RECO", psetHash="GIBBERISH", configContent="MOREGIBBERISH") testFileD.setDatasetPath(self.testDatasetB) testFileD.addRun(Run(2, *[45])) testFileD.create() # Second file on second block testFileE = DBSBufferFile(lfn=makeUUID(), size=1024, events=10, checksums=checksums, locations=set(["T1_US_FNAL_Disk"])) testFileE.setAlgorithm(appName="cmsRun", appVer="CMSSW_2_1_8", appFam="RECO", psetHash="GIBBERISH", configContent="MOREGIBBERISH") testFileE.setDatasetPath(self.testDatasetB) testFileE.addRun(Run(2, *[45])) testFileE.create() self.testFilesB.append(testFileD) self.testFilesB.append(testFileE) # insert datasets in the dbsbuffer table datasetAction.execute(datasetPath=self.testDatasetA) datasetAction.execute(datasetPath=self.testDatasetB) self.blockAName = self.testDatasetA + "#" + makeUUID() self.blockBName = self.testDatasetB + "#" + makeUUID() # create and insert blocks into dbsbuffer table newBlockA = DBSBufferBlock(name=self.blockAName, location="T2_CH_CERN", datasetpath=None) newBlockA.setDataset(self.testDatasetA, 'data', 'VALID') newBlockA.status = 'Closed' newBlockB = DBSBufferBlock(name=self.blockBName, location="T1_US_FNAL_Disk", datasetpath=None) newBlockB.setDataset(self.testDatasetB, 'data', 'VALID') newBlockB.status = 'Closed' createAction.execute(blocks=[newBlockA, newBlockB]) # associate files to their correspondent block id setBlock.execute(testFileA["lfn"], self.blockAName) setBlock.execute(testFileB["lfn"], self.blockAName) setBlock.execute(testFileC["lfn"], self.blockAName) setBlock.execute(testFileD["lfn"], self.blockBName) setBlock.execute(testFileE["lfn"], self.blockBName) # set file status to LOCAL fileStatus.execute(testFileA["lfn"], "LOCAL") fileStatus.execute(testFileB["lfn"], "LOCAL") fileStatus.execute(testFileC["lfn"], "LOCAL") fileStatus.execute(testFileD["lfn"], "LOCAL") fileStatus.execute(testFileE["lfn"], "LOCAL") # associate files to a given workflow associateWorkflow.execute(testFileA["lfn"], "BogusRequest", "BogusTask") associateWorkflow.execute(testFileB["lfn"], "BogusRequest", "BogusTask") associateWorkflow.execute(testFileC["lfn"], "BogusRequest", "BogusTask") associateWorkflow.execute(testFileD["lfn"], "BogusRequest", "BogusTask") associateWorkflow.execute(testFileE["lfn"], "BogusRequest", "BogusTask") return
def setUp(self): """ _setUp_ Create two subscriptions: One that contains a single file and one that contains multiple files. """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules=["WMCore.WMBS"], useDefault=False) myThread = threading.currentThread() daofactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) locationAction = daofactory(classname="Locations.New") locationAction.execute(siteName="site1", pnn="T2_CH_CERN") locationAction.execute(siteName="site2", pnn="T1_US_FNAL_Disk") self.multipleFileFileset = Fileset(name="TestFileset1") self.multipleFileFileset.create() for i in range(10): newFile = File(makeUUID(), size=1000, events=100, locations=set(["T2_CH_CERN"])) newFile.create() self.multipleFileFileset.addFile(newFile) self.multipleFileFileset.commit() self.singleFileFileset = Fileset(name="TestFileset2") self.singleFileFileset.create() newFile = File("/some/file/name", size=1000, events=100, locations=set(["T2_CH_CERN"])) newFile.create() self.singleFileFileset.addFile(newFile) self.singleFileFileset.commit() self.multipleSiteFileset = Fileset(name="TestFileset3") self.multipleSiteFileset.create() for i in range(5): newFile = File(makeUUID(), size=1000, events=100) newFile.setLocation("T2_CH_CERN") newFile.create() self.multipleSiteFileset.addFile(newFile) for i in range(5): newFile = File(makeUUID(), size=1000, events=100) newFile.setLocation(["T2_CH_CERN", "T1_US_FNAL_Disk"]) newFile.create() self.multipleSiteFileset.addFile(newFile) self.multipleSiteFileset.commit() testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test") testWorkflow.create() self.multipleFileSubscription = Subscription( fileset=self.multipleFileFileset, workflow=testWorkflow, split_algo="SizeBased", type="Processing") self.multipleFileSubscription.create() self.singleFileSubscription = Subscription( fileset=self.singleFileFileset, workflow=testWorkflow, split_algo="SizeBased", type="Processing") self.singleFileSubscription.create() self.multipleSiteSubscription = Subscription( fileset=self.multipleSiteFileset, workflow=testWorkflow, split_algo="SizeBased", type="Processing") self.multipleSiteSubscription.create() return
def __init__(self, config): """ Initialise class members """ logging.info("Running __init__ for DBS3 Uploader") BaseWorkerThread.__init__(self) self.config = config # This is slightly dangerous, but DBSUpload depends # on DBSInterface anyway self.dbsUrl = self.config.DBS3Upload.dbsUrl # Tier0 Agent don't need this if hasattr(self.config, "Tier0Feeder"): self.wmstatsServerSvc = None else: wmstatsSvcURL = self.config.General.centralWMStatsURL.replace("couchdb/wmstats", "wmstatsserver") self.wmstatsServerSvc = WMStatsServer(wmstatsSvcURL) self.dbsUtil = DBSBufferUtil() myThread = threading.currentThread() daoFactory = DAOFactory(package="WMComponent.DBS3Buffer", logger=myThread.logger, dbinterface=myThread.dbi) self.updateBlocksDAO = daoFactory(classname="UpdateBlocks") self.updateFilesDAO = daoFactory(classname="UpdateFiles") self.createBlocksDAO = daoFactory(classname="CreateBlocks") self.setBlockFilesDAO = daoFactory(classname="SetBlockFiles") self.pool = [] self.blocksToCheck = [] self.workInput = None self.workResult = None self.nProc = getattr(self.config.DBS3Upload, 'nProcesses', 4) self.wait = getattr(self.config.DBS3Upload, 'dbsWaitTime', 2) self.nTries = getattr(self.config.DBS3Upload, 'dbsNTries', 300) self.physicsGroup = getattr(self.config.DBS3Upload, "physicsGroup", "NoGroup") self.datasetType = getattr(self.config.DBS3Upload, "datasetType", "PRODUCTION") self.primaryDatasetType = getattr(self.config.DBS3Upload, "primaryDatasetType", "mc") self.blockCount = 0 self.dbsApi = DbsApi(url=self.dbsUrl) # List of blocks currently in processing self.queuedBlocks = [] # Set up the pool of worker processes self.setupPool() # Setting up any cache objects self.blockCache = {} self.filesToUpdate = [] self.produceCopy = getattr(self.config.DBS3Upload, 'dumpBlock', False) self.copyPath = os.path.join(getattr(self.config.DBS3Upload, 'componentDir', '/data/srv/'), 'dbsuploader_block.json') self.timeoutWaiver = 1 self.datasetParentageCache = {} return
def wmbsSubscriptionStatus(logger, dbi, conn, transaction): """Function to return status of wmbs subscriptions """ action = DAOFactory(package='WMBS', logger=logger, dbinterface=dbi)('Monitoring.SubscriptionStatus') return action.execute(conn=conn, transaction=transaction)
def addParents(self, parentLFNs): """ _addParents_ Associate this file with it's parents. If the parents do not exist in the buffer then bogus place holder files will be created so that the parentage information can be tracked and correctly inserted into DBS. """ newAlgoAction = self.daoFactory(classname = "NewAlgo") newDatasetAction = self.daoFactory(classname = "NewDataset") assocAction = self.daoFactory(classname = "AlgoDatasetAssoc") existsAction = self.daoFactory(classname = "DBSBufferFiles.Exists") uploadFactory = DAOFactory(package = "WMComponent.DBS3Buffer", logger = self.logger, dbinterface = self.dbi) setDatasetAlgoAction = uploadFactory(classname = "SetDatasetAlgo") existingTransaction = self.beginTransaction() toBeCreated = [] for parentLFN in parentLFNs: self["parents"].add(DBSBufferFile(lfn = parentLFN)) if not existsAction.execute(lfn = parentLFN, conn = self.getDBConn(), transaction = True): toBeCreated.append(parentLFN) if len(toBeCreated) > 0: newAlgoAction.execute(appName = "cmsRun", appVer = "UNKNOWN", appFam = "UNKNOWN", psetHash = "NOT_SET", configContent = "NOT_SET", conn = self.getDBConn(), transaction = True) newDatasetAction.execute(datasetPath = "bogus", conn = self.getDBConn(), transaction = True) assocID = assocAction.execute(appName = "cmsRun", appVer = "UNKNOWN", appFam = "UNKNOWN", psetHash = "NOT_SET", datasetPath = "bogus", conn = self.getDBConn(), transaction = True) setDatasetAlgoAction.execute(datasetAlgo = assocID, inDBS = 1, conn = self.getDBConn(), transaction = True) action = self.daoFactory(classname = "DBSBufferFiles.AddIgnore") action.execute(lfns = toBeCreated, datasetAlgo = assocID, status = "GLOBAL", conn = self.getDBConn(), transaction = True) action = self.daoFactory(classname = "DBSBufferFiles.HeritageLFNParent") action.execute(parentLFNs = parentLFNs, childLFN = self["lfn"], conn = self.getDBConn(), transaction = self.existingTransaction()) self.commitTransaction(existingTransaction) return
def killWorkflow(workflowName, jobCouchConfig, bossAirConfig=None): """ _killWorkflow_ Kill a workflow that is already executing inside the agent. This will mark all incomplete jobs as failed and files that belong to all non-cleanup and non-logcollect subscriptions as failed. The name of the JSM couch database and the URL to the database must be passed in as well so the state transitions are logged. """ myThread = threading.currentThread() daoFactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) killFilesAction = daoFactory(classname="Subscriptions.KillWorkflow") killJobsAction = daoFactory(classname="Jobs.KillWorkflow") killFilesAction.execute(workflowName=workflowName, conn=myThread.transaction.conn) liveJobs = killJobsAction.execute(workflowName=workflowName, conn=myThread.transaction.conn) changeState = ChangeState(jobCouchConfig) # Deal with any jobs that are running in the batch system # only works if we can start the API if bossAirConfig: bossAir = BossAirAPI(config=bossAirConfig, noSetup=True) killableJobs = [] for liveJob in liveJobs: if liveJob["state"].lower() == 'executing': # Then we need to kill this on the batch system liveWMBSJob = Job(id=liveJob["id"]) liveWMBSJob.update(liveJob) killableJobs.append(liveJob) # Now kill them try: bossAir.kill(jobs=killableJobs) except BossAirException as ex: # Something's gone wrong # Jobs not killed! logging.error( "Error while trying to kill running jobs in workflow!\n") logging.error(str(ex)) trace = getattr(ex, 'traceback', '') logging.error(trace) # But continue; we need to kill the jobs in the master # the batch system will have to take care of itself. pass liveWMBSJobs = defaultdict(list) for liveJob in liveJobs: if liveJob["state"] == "killed": # Then we've killed it already continue liveWMBSJob = Job(id=liveJob["id"]) liveWMBSJob.update(liveJob) liveWMBSJobs[liveJob["state"]].append(liveWMBSJob) for state, jobsByState in liveWMBSJobs.items(): if len(jobsByState) > 100 and state != "executing": # if there are to many jobs skip the couch and dashboard update # TODO: couch and dashboard need to be updated or parallel. changeState.check("killed", state) changeState.persist(jobsByState, "killed", state) else: changeState.propagate(jobsByState, "killed", state) return
def algorithm(self, *args, **kwargs): """ _algorithm_ Run the discovery query and generate jobs if we find enough files. """ # This doesn't use a proxy self.grabByProxy = False filesPerJob = int(kwargs.get("files_per_job", 10)) myThread = threading.currentThread() daoFactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) fileAvail = daoFactory( classname="Subscriptions.SiblingSubscriptionsComplete") completeFiles = fileAvail.execute(self.subscription["id"], conn=myThread.transaction.conn, transaction=True) self.subscription["fileset"].load() if self.subscription["fileset"].open == True: filesetClosed = False else: fileFailed = daoFactory( classname="Subscriptions.SiblingSubscriptionsFailed") fileFailed.execute(self.subscription["id"], self.subscription["fileset"].id, conn=myThread.transaction.conn, transaction=True) filesetClosed = True fileSites = {} foundFiles = [] for completeFile in completeFiles: if completeFile["lfn"] not in foundFiles: foundFiles.append(completeFile["lfn"]) else: continue if not fileSites.has_key(completeFile["se_name"]): fileSites[completeFile["se_name"]] = [] fileSites[completeFile["se_name"]].append(completeFile) for siteName in fileSites.keys(): if len(fileSites[siteName]) < filesPerJob and not filesetClosed: continue self.newGroup() while len(fileSites[siteName]) >= filesPerJob: self.newJob(name=makeUUID()) for jobFile in fileSites[siteName][0:filesPerJob]: newFile = File(id=jobFile["id"], lfn=jobFile["lfn"], events=jobFile["events"]) newFile["locations"] = set([jobFile["se_name"]]) self.currentJob.addFile(newFile) fileSites[siteName] = fileSites[siteName][filesPerJob:] if filesetClosed and len(fileSites[siteName]) > 0: self.newJob(name=makeUUID()) for jobFile in fileSites[siteName]: newFile = File(id=jobFile["id"], lfn=jobFile["lfn"], events=jobFile["events"]) newFile["locations"] = set([jobFile["se_name"]]) self.currentJob.addFile(newFile) return
def algorithm(self, groupInstance=None, jobInstance=None, *args, **kwargs): """ _algorithm_ A file based splitting algorithm """ # extract some global scheduling parameters self.jobNamePrefix = kwargs.get('jobNamePrefix', "RepackMerge") self.minInputSize = kwargs['minInputSize'] self.maxInputSize = kwargs['maxInputSize'] self.maxInputEvents = kwargs['maxInputEvents'] self.maxInputFiles = kwargs['maxInputFiles'] self.maxEdmSize = kwargs['maxEdmSize'] self.maxOverSize = kwargs['maxOverSize'] # catch configuration errors if self.maxOverSize > self.maxEdmSize: self.maxOverSize = self.maxEdmSize self.createdGroup = False myThread = threading.currentThread() daoFactory = DAOFactory(package="T0.WMBS", logger=logging, dbinterface=myThread.dbi) maxLumiWithJobDAO = daoFactory( classname="Subscriptions.MaxLumiWithJob") getGoodLumiHolesDAO = daoFactory( classname="JobSplitting.GetGoodLumiHoles") # highest lumi with a job maxLumiWithJob = maxLumiWithJobDAO.execute(self.subscription["id"]) logging.debug("DEBUG Sub %d, maxLumiWithJob = %d" % (self.subscription["id"], maxLumiWithJob)) # find good lumi holes (needs to be done before data discovery) goodLumiHoles = getGoodLumiHolesDAO.execute(self.subscription["id"], maxLumiWithJob) logging.debug("DEBUG Sub %d, goodLumiHoles = %s" % (self.subscription["id"], sorted(goodLumiHoles))) # data discovery getFilesDAO = daoFactory( classname="Subscriptions.GetAvailableRepackMergeFiles") availableFiles = getFilesDAO.execute(self.subscription["id"]) # nothing to do, stop immediately if len(availableFiles) == 0: return # lumis we have data for lumiList = set([]) for result in availableFiles: for lumi in range(result['first_lumi'], result['last_lumi'] + 1): lumiList.add(lumi) lumiList = sorted(list(lumiList)) logging.debug("DEBUG Sub %d, lumiList = %s" % (self.subscription["id"], lumiList)) # check if fileset is closed fileset = self.subscription.getFileset() fileset.load() # extended lumi range for job creation firstLumi = maxLumiWithJob + 1 lastLumi = lumiList[-1] # consistency check (ignore at end of run) if lumiList[0] <= maxLumiWithJob: if fileset.open: logging.error( "ERROR: finding data that can't be there, bailing out...") return else: logging.info( "WARNING: finding data that can't be there, fileset is closed, merge anyways..." ) firstLumi = lumiList[0] # narrow down lumi range for job creation filesByLumi = {} for lumi in range(firstLumi, lastLumi + 1): if (lumi in lumiList) or (lumi in goodLumiHoles): filesByLumi[lumi] = [] else: break # figure out what data to create jobs for for fileInfo in availableFiles: lumi = fileInfo['first_lumi'] if filesByLumi.has_key(lumi): filesByLumi[lumi].append(fileInfo) logging.debug("DEBUG Sub %d, create jobs for lumis = %s" % (self.subscription["id"], sorted(filesByLumi.keys()))) self.defineJobs(filesByLumi, fileset.open) return
def __init__(self, config): BasePlugin.__init__(self, config) self.locationDict = {} myThread = threading.currentThread() daoFactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) self.locationAction = daoFactory(classname="Locations.GetSiteInfo") self.packageDir = None # if agent is running in a container, Unpacker.py must come from a directory # on the host so the condor schedd can see it # config.General.workDir should always be bind mounted to the container if getattr(config.Agent, "isDocker", False): unpackerPath = os.path.join(config.General.workDir + "/Docker/WMRuntime/Unpacker.py") else: unpackerPath = os.path.join( getWMBASE(), 'src/python/WMCore/WMRuntime/Unpacker.py') if os.path.exists(unpackerPath): self.unpacker = unpackerPath else: self.unpacker = os.path.join(getWMBASE(), 'WMCore/WMRuntime/Unpacker.py') self.agent = getattr(config.Agent, 'agentName', 'WMAgent') self.sandbox = None self.scriptFile = config.JobSubmitter.submitScript self.defaultTaskPriority = getattr(config.BossAir, 'defaultTaskPriority', 0) self.maxTaskPriority = getattr(config.BossAir, 'maxTaskPriority', 1e7) self.jobsPerSubmit = getattr(config.JobSubmitter, 'jobsPerSubmit', 200) self.extraMem = getattr(config.JobSubmitter, 'extraMemoryPerCore', 500) # Required for global pool accounting self.acctGroup = getattr(config.BossAir, 'acctGroup', "production") self.acctGroupUser = getattr(config.BossAir, 'acctGroupUser', "cmsdataops") # Build a requirement string. All CMS resources match DESIRED_Sites on the START # expression side; however, there are currently some resources (T2_CH_CERN_HLT) # that are missing the REQUIRED_OS logic. Hence, we duplicate it here. # TODO(bbockelm): Remove reqStr once HLT has upgraded. self.reqStr = ( '((REQUIRED_OS=?="any") || ' '(GLIDEIN_REQUIRED_OS =?= "any") || ' 'stringListMember(GLIDEIN_REQUIRED_OS, REQUIRED_OS)) && ' '(AuthenticatedIdentity =!= "*****@*****.**")') if hasattr(config.BossAir, 'condorRequirementsString'): self.reqStr = config.BossAir.condorRequirementsString # x509 proxy handling proxy = Proxy({'logger': myThread.logger}) self.x509userproxy = proxy.getProxyFilename() # These are added now by the condor client #self.x509userproxysubject = proxy.getSubject() #self.x509userproxyfqan = proxy.getAttributeFromProxy(self.x509userproxy) return
def __init__(self, config): """ _init_ """ BaseWorkerThread.__init__(self) myThread = threading.currentThread() self.daoFactory = DAOFactory(package="T0.WMBS", logger=logging, dbinterface=myThread.dbi) self.tier0ConfigFile = config.Tier0Feeder.tier0ConfigFile self.specDirectory = config.Tier0Feeder.specDirectory self.dropboxuser = getattr(config.Tier0Feeder, "dropboxuser", None) self.dropboxpass = getattr(config.Tier0Feeder, "dropboxpass", None) self.transferSystemBaseDir = getattr(config.Tier0Feeder, "transferSystemBaseDir", None) if self.transferSystemBaseDir != None: if not os.path.exists(self.transferSystemBaseDir): self.transferSystemBaseDir = None self.dqmUploadProxy = getattr(config.Tier0Feeder, "dqmUploadProxy", None) self.serviceProxy = getattr(config.Tier0Feeder, "serviceProxy", None) self.localRequestCouchDB = RequestDBWriter( config.AnalyticsDataCollector.localT0RequestDBURL, couchapp=config.AnalyticsDataCollector.RequestCouchApp) hltConfConnectUrl = config.HLTConfDatabase.connectUrl dbFactoryHltConf = DBFactory(logging, dburl=hltConfConnectUrl, options={}) dbInterfaceHltConf = dbFactoryHltConf.connect() daoFactoryHltConf = DAOFactory(package="T0.WMBS", logger=logging, dbinterface=dbInterfaceHltConf) self.getHLTConfigDAO = daoFactoryHltConf( classname="RunConfig.GetHLTConfig") storageManagerConnectUrl = config.StorageManagerDatabase.connectUrl dbFactoryStorageManager = DBFactory(logging, dburl=storageManagerConnectUrl, options={}) self.dbInterfaceStorageManager = dbFactoryStorageManager.connect() self.getExpressReadyRunsDAO = None if hasattr(config, "PopConLogDatabase"): popConLogConnectUrl = getattr(config.PopConLogDatabase, "connectUrl", None) if popConLogConnectUrl != None: dbFactoryPopConLog = DBFactory(logging, dburl=popConLogConnectUrl, options={}) dbInterfacePopConLog = dbFactoryPopConLog.connect() daoFactoryPopConLog = DAOFactory( package="T0.WMBS", logger=logging, dbinterface=dbInterfacePopConLog) self.getExpressReadyRunsDAO = daoFactoryPopConLog( classname="Tier0Feeder.GetExpressReadyRuns") self.haveT0DataSvc = False if hasattr(config, "T0DataSvcDatabase"): t0datasvcConnectUrl = getattr(config.T0DataSvcDatabase, "connectUrl", None) if t0datasvcConnectUrl != None: self.haveT0DataSvc = True dbFactoryT0DataSvc = DBFactory(logging, dburl=t0datasvcConnectUrl, options={}) dbInterfaceT0DataSvc = dbFactoryT0DataSvc.connect() self.daoFactoryT0DataSvc = DAOFactory( package="T0.WMBS", logger=logging, dbinterface=dbInterfaceT0DataSvc) return
def __init__(self, config): """ __init__ Create all DAO objects that are used by this class. """ WMConnectionBase.__init__(self, "WMCore.WMBS") myThread = threading.currentThread() self.dbsDaoFactory = DAOFactory(package="WMComponent.DBS3Buffer", logger=myThread.logger, dbinterface=myThread.dbi) self.getOutputMapAction = self.daofactory( classname="Jobs.GetOutputMap") self.bulkAddToFilesetAction = self.daofactory( classname="Fileset.BulkAddByLFN") self.bulkParentageAction = self.daofactory( classname="Files.AddBulkParentage") self.getJobTypeAction = self.daofactory(classname="Jobs.GetType") self.getParentInfoAction = self.daofactory( classname="Files.GetParentInfo") self.setParentageByJob = self.daofactory( classname="Files.SetParentageByJob") self.setParentageByMergeJob = self.daofactory( classname="Files.SetParentageByMergeJob") self.setFileRunLumi = self.daofactory(classname="Files.AddRunLumi") self.setFileLocation = self.daofactory( classname="Files.SetLocationByLFN") self.setFileAddChecksum = self.daofactory( classname="Files.AddChecksumByLFN") self.addFileAction = self.daofactory(classname="Files.Add") self.jobCompleteInput = self.daofactory(classname="Jobs.CompleteInput") self.setBulkOutcome = self.daofactory(classname="Jobs.SetOutcomeBulk") self.getWorkflowSpec = self.daofactory( classname="Workflow.GetSpecAndNameFromTask") self.getJobInfoByID = self.daofactory(classname="Jobs.LoadFromID") self.getFullJobInfo = self.daofactory( classname="Jobs.LoadForErrorHandler") self.getJobTaskNameAction = self.daofactory( classname="Jobs.GetFWJRTaskName") self.dbsStatusAction = self.dbsDaoFactory( classname="DBSBufferFiles.SetStatus") self.dbsParentStatusAction = self.dbsDaoFactory( classname="DBSBufferFiles.GetParentStatus") self.dbsChildrenAction = self.dbsDaoFactory( classname="DBSBufferFiles.GetChildren") self.dbsCreateFiles = self.dbsDaoFactory( classname="DBSBufferFiles.Add") self.dbsSetLocation = self.dbsDaoFactory( classname="DBSBufferFiles.SetLocationByLFN") self.dbsInsertLocation = self.dbsDaoFactory( classname="DBSBufferFiles.AddLocation") self.dbsSetChecksum = self.dbsDaoFactory( classname="DBSBufferFiles.AddChecksumByLFN") self.dbsSetRunLumi = self.dbsDaoFactory( classname="DBSBufferFiles.AddRunLumi") self.dbsGetWorkflow = self.dbsDaoFactory(classname="ListWorkflow") self.dbsLFNHeritage = self.dbsDaoFactory( classname="DBSBufferFiles.BulkHeritageParent") self.stateChanger = ChangeState(config) # Decide whether or not to attach jobReport to returned value self.returnJobReport = getattr(config.JobAccountant, 'returnReportFromWorker', False) # Store location for the specs for DBS self.specDir = getattr(config.JobAccountant, 'specDir', None) # ACDC service self.dataCollection = DataCollectionService( url=config.ACDC.couchurl, database=config.ACDC.database) jobDBurl = sanitizeURL(config.JobStateMachine.couchurl)['url'] jobDBName = config.JobStateMachine.couchDBName jobCouchdb = CouchServer(jobDBurl) self.fwjrCouchDB = jobCouchdb.connectDatabase("%s/fwjrs" % jobDBName) self.localWMStats = WMStatsWriter(config.TaskArchiver.localWMStatsURL, "WMStatsAgent") # Hold data for later commital self.dbsFilesToCreate = [] self.wmbsFilesToBuild = [] self.wmbsMergeFilesToBuild = [] self.fileLocation = None self.mergedOutputFiles = [] self.listOfJobsToSave = [] self.listOfJobsToFail = [] self.filesetAssoc = [] self.parentageBinds = [] self.parentageBindsForMerge = [] self.jobsWithSkippedFiles = {} self.count = 0 self.datasetAlgoID = collections.deque(maxlen=1000) self.datasetAlgoPaths = collections.deque(maxlen=1000) self.dbsLocations = set() self.workflowIDs = collections.deque(maxlen=1000) self.workflowPaths = collections.deque(maxlen=1000) self.phedex = PhEDEx() self.locLists = self.phedex.getNodeMap() return
def setUp(self): """ _setUp_ """ import WMQuality.TestInit WMQuality.TestInit.deleteDatabaseAfterEveryTest("I'm Serious") self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema( customModules=["WMComponent.DBS3Buffer", "T0.WMBS"]) self.splitterFactory = SplitterFactory(package="T0.JobSplitting") myThread = threading.currentThread() daoFactory = DAOFactory(package="T0.WMBS", logger=logging, dbinterface=myThread.dbi) myThread.dbi.processData("""INSERT INTO wmbs_location (id, site_name, state, state_time) VALUES (1, 'SomeSite', 1, 1) """, transaction=False) myThread.dbi.processData("""INSERT INTO wmbs_pnns (id, pnn) VALUES (2, 'SomePNN') """, transaction=False) myThread.dbi.processData("""INSERT INTO wmbs_location_pnns (location, pnn) VALUES (1, 2) """, transaction=False) insertRunDAO = daoFactory(classname="RunConfig.InsertRun") insertRunDAO.execute(binds={ 'RUN': 1, 'HLTKEY': "someHLTKey" }, transaction=False) insertLumiDAO = daoFactory(classname="RunConfig.InsertLumiSection") for lumi in [1, 2, 3, 4]: insertLumiDAO.execute(binds={ 'RUN': 1, 'LUMI': lumi }, transaction=False) insertStreamDAO = daoFactory(classname="RunConfig.InsertStream") insertStreamDAO.execute(binds={'STREAM': "A"}, transaction=False) insertStreamFilesetDAO = daoFactory( classname="RunConfig.InsertStreamFileset") insertStreamFilesetDAO.execute(1, "A", "TestFileset1") self.fileset1 = Fileset(name="TestFileset1") self.fileset1.load() workflow1 = Workflow(spec="spec.xml", owner="hufnagel", name="TestWorkflow1", task="Test") workflow1.create() self.subscription1 = Subscription(fileset=self.fileset1, workflow=workflow1, split_algo="Repack", type="Repack") self.subscription1.create() # keep for later self.insertClosedLumiDAO = daoFactory( classname="RunLumiCloseout.InsertClosedLumi") self.currentTime = int(time.time()) # default split parameters self.splitArgs = {} self.splitArgs['maxSizeSingleLumi'] = 20 * 1024 * 1024 * 1024 self.splitArgs['maxSizeMultiLumi'] = 10 * 1024 * 1024 * 1024 self.splitArgs['maxInputEvents'] = 500000 self.splitArgs['maxInputFiles'] = 1000 self.splitArgs['maxLatency'] = 50000 return
def testGetFinishedWorkflows(self): """ _testGetFinishedWorkflows_ Test that we get only those workflows which are finished, that is, workflows where all its subscriptions are finished and all other workflows with the same spec are finished too """ owner = "no-one" #Create a bunch of worklows with "different" specs and tasks workflows = [] for i in range(0, 100): scaledIndex = i % 10 testWorkflow = Workflow(spec="sp00%i" % scaledIndex, owner=owner, name="wf00%i" % scaledIndex, task="task%i" % i) testWorkflow.create() workflows.append(testWorkflow) #Everyone will use this fileset testFileset = Fileset(name="TestFileset") testFileset.create() #Create subscriptions! subscriptions = [] for workflow in workflows: subscription = Subscription(fileset=testFileset, workflow=workflow) subscription.create() subscriptions.append(subscription) #Check that all workflows are NOT finished myThread = threading.currentThread() daoFactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) getFinishedDAO = daoFactory(classname="Workflow.GetFinishedWorkflows") result = getFinishedDAO.execute() self.assertEqual( len(result), 0, "A workflow is incorrectly flagged as finished: %s" % str(result)) #Mark the first 50 subscriptions as finished for idx, sub in enumerate(subscriptions): if idx > 49: break sub.markFinished() #No workflow is finished, none of them has all the subscriptions completed result = getFinishedDAO.execute() self.assertEqual( len(result), 0, "A workflow is incorrectly flagged as finished: %s" % str(result)) #Now finish all workflows in wf{000-5} for idx, sub in enumerate(subscriptions): if idx < 50 or idx % 10 > 5: continue sub.markFinished() #Check the workflows result = getFinishedDAO.execute() self.assertEqual( len(result), 6, "A workflow is incorrectly flagged as finished: %s" % str(result)) #Check the overall structure of the workflows for wf in result: #Sanity checks on the results # These are very specific checks and depends heavily on the names of task, spec and workflow self.assertEqual( wf[2:], result[wf]['spec'][2:], "A workflow has the wrong spec-name combination: %s" % str(wf)) self.assertTrue( int(wf[2:]) < 6, "A workflow is incorrectly flagged as finished: %s" % str(wf)) self.assertEqual( len(result[wf]['workflows']), 10, "A workflow has more tasks than it should: %s" % str(result[wf])) for task in result[wf]['workflows']: self.assertEqual( len(result[wf]['workflows'][task]), 1, "A workflow has more subscriptions than it should: %s" % str(result[wf])) return
def setUp(self): """ _setUp_ Setup the database and WMBS for the test. """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema( customModules=["WMComponent.DBS3Buffer", "WMCore.WMBS"], useDefault=False) myThread = threading.currentThread() self.daofactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) self.dbsfactory = DAOFactory(package="WMComponent.DBS3Buffer", logger=myThread.logger, dbinterface=myThread.dbi) locationAction = self.daofactory(classname="Locations.New") locationAction.execute(siteName="site1", pnn="T1_US_FNAL_Disk") inputFile = File(lfn="/path/to/some/lfn", size=10, events=10, locations="T1_US_FNAL_Disk") inputFile.create() inputFileset = Fileset(name="InputFileset") inputFileset.create() inputFileset.addFile(inputFile) inputFileset.commit() unmergedFileset = Fileset(name="UnmergedFileset") unmergedFileset.create() mergedFileset = Fileset(name="MergedFileset") mergedFileset.create() procWorkflow = Workflow(spec="wf001.xml", owner="Steve", name="TestWF", task="/TestWF/None") procWorkflow.create() procWorkflow.addOutput("outputRECORECO", unmergedFileset) mergeWorkflow = Workflow(spec="wf002.xml", owner="Steve", name="MergeWF", task="/MergeWF/None") mergeWorkflow.create() mergeWorkflow.addOutput("Merged", mergedFileset) insertWorkflow = self.dbsfactory(classname="InsertWorkflow") insertWorkflow.execute("TestWF", "/TestWF/None", 0, 0, 0, 0) insertWorkflow.execute("MergeWF", "/MergeWF/None", 0, 0, 0, 0) self.procSubscription = Subscription(fileset=inputFileset, workflow=procWorkflow, split_algo="FileBased", type="Processing") self.procSubscription.create() self.procSubscription.acquireFiles() self.mergeSubscription = Subscription(fileset=unmergedFileset, workflow=mergeWorkflow, split_algo="WMBSMergeBySize", type="Merge") self.mergeSubscription.create() self.procJobGroup = JobGroup(subscription=self.procSubscription) self.procJobGroup.create() self.mergeJobGroup = JobGroup(subscription=self.mergeSubscription) self.mergeJobGroup.create() self.testJob = Job(name="testJob", files=[inputFile]) self.testJob.create(group=self.procJobGroup) self.testJob["state"] = "complete" myThread = threading.currentThread() self.daofactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) self.stateChangeAction = self.daofactory(classname="Jobs.ChangeState") self.setFWJRAction = self.daofactory(classname="Jobs.SetFWJRPath") self.getJobTypeAction = self.daofactory(classname="Jobs.GetType") locationAction = self.daofactory(classname="Locations.New") locationAction.execute(siteName="cmssrm.fnal.gov") self.stateChangeAction.execute(jobs=[self.testJob]) self.tempDir = tempfile.mkdtemp() return
def setUp(self): """ _setUp_ """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema( customModules=["WMComponent.DBS3Buffer", "T0.WMBS"]) self.splitterFactory = SplitterFactory(package="T0.JobSplitting") myThread = threading.currentThread() daoFactory = DAOFactory(package="T0.WMBS", logger=logging, dbinterface=myThread.dbi) wmbsDaoFactory = DAOFactory(package="WMCore.WMBS", logger=logging, dbinterface=myThread.dbi) myThread.dbi.processData("""INSERT INTO wmbs_location (id, site_name, state, state_time) VALUES (1, 'SomeSite', 1, 1) """, transaction=False) myThread.dbi.processData("""INSERT INTO wmbs_pnns (id, pnn) VALUES (2, 'SomePNN') """, transaction=False) myThread.dbi.processData("""INSERT INTO wmbs_location_pnn (location, pnn) VALUES (1, 'SomePNN') """, transaction=False) insertRunDAO = daoFactory(classname="RunConfig.InsertRun") insertRunDAO.execute(binds={ 'RUN': 1, 'HLTKEY': "someHLTKey" }, transaction=False) insertLumiDAO = daoFactory(classname="RunConfig.InsertLumiSection") insertLumiDAO.execute(binds={'RUN': 1, 'LUMI': 1}, transaction=False) insertStreamDAO = daoFactory(classname="RunConfig.InsertStream") insertStreamDAO.execute(binds={'STREAM': "Express"}, transaction=False) insertStreamFilesetDAO = daoFactory( classname="RunConfig.InsertStreamFileset") insertStreamFilesetDAO.execute(1, "Express", "TestFileset1") insertStreamerDAO = daoFactory(classname="RunConfig.InsertStreamer") insertStreamerDAO.execute(streamerPNN="SomePNN", binds={ 'RUN': 1, 'P5_ID': 1, 'LUMI': 1, 'STREAM': "Express", 'TIME': int(time.time()), 'LFN': "/streamer", 'FILESIZE': 0, 'EVENTS': 0 }, transaction=False) insertPromptCalibrationDAO = daoFactory( classname="RunConfig.InsertPromptCalibration") insertPromptCalibrationDAO.execute( { 'RUN': 1, 'STREAM': "Express", 'NUM_PRODUCER': 1 }, transaction=False) self.markPromptCalibrationFinishedDAO = daoFactory( classname="ConditionUpload.MarkPromptCalibrationFinished") self.fileset1 = Fileset(name="TestFileset1") self.fileset1.create() workflow1 = Workflow(spec="spec.xml", owner="hufnagel", name="TestWorkflow1", task="Test") workflow1.create() self.subscription1 = Subscription(fileset=self.fileset1, workflow=workflow1, split_algo="Condition", type="Condition") self.subscription1.create() # set parentage chain and sqlite fileset alcaRecoFile = File("/alcareco", size=0, events=0) alcaRecoFile.addRun(Run(1, *[1])) alcaRecoFile.setLocation("SomePNN", immediateSave=False) alcaRecoFile.create() alcaPromptFile = File("/alcaprompt", size=0, events=0) alcaPromptFile.addRun(Run(1, *[1])) alcaPromptFile.setLocation("SomePNN", immediateSave=False) alcaPromptFile.create() sqliteFile = File("/sqlite", size=0, events=0) sqliteFile.create() self.fileset1.addFile(sqliteFile) self.fileset1.commit() results = myThread.dbi.processData("""SELECT lfn FROM wmbs_file_details """, transaction=False)[0].fetchall() setParentageDAO = wmbsDaoFactory(classname="Files.SetParentage") setParentageDAO.execute(binds=[{ 'parent': "/streamer", 'child': "/alcareco" }, { 'parent': "/alcareco", 'child': "/alcaprompt" }, { 'parent': "/alcaprompt", 'child': "/sqlite" }], transaction=False) # default split parameters self.splitArgs = {} self.splitArgs['runNumber'] = 1 self.splitArgs['streamName'] = "Express" return
def populateWMBS(self): """ _populateWMBS_ Create files and subscriptions in WMBS """ myThread = threading.currentThread() daofactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) locationAction = daofactory(classname="Locations.New") locationAction.execute(siteName='s1', pnn="T1_US_FNAL_Disk") locationAction.execute(siteName='s2', pnn="T2_CH_CERN") self.validLocations = ["T1_US_FNAL_Disk", "T2_CH_CERN"] self.multipleFileFileset = Fileset(name="TestFileset1") self.multipleFileFileset.create() parentFile = File('/parent/lfn/', size=1000, events=100, locations=set(["T1_US_FNAL_Disk"])) parentFile.create() for _ in range(10): newFile = File(makeUUID(), size=1000, events=100, locations=set(["T1_US_FNAL_Disk"])) newFile.create() newFile.addParent(lfn=parentFile['lfn']) self.multipleFileFileset.addFile(newFile) self.multipleFileFileset.commit() self.singleFileFileset = Fileset(name="TestFileset2") self.singleFileFileset.create() newFile = File("/some/file/name", size=1000, events=100, locations=set(["T1_US_FNAL_Disk"])) newFile.create() self.singleFileFileset.addFile(newFile) self.singleFileFileset.commit() self.multipleSiteFileset = Fileset(name="TestFileset3") self.multipleSiteFileset.create() for _ in range(5): newFile = File(makeUUID(), size=1000, events=100) newFile.setLocation("T1_US_FNAL_Disk") newFile.create() self.multipleSiteFileset.addFile(newFile) for _ in range(5): newFile = File(makeUUID(), size=1000, events=100) newFile.setLocation(["T1_US_FNAL_Disk", "T2_CH_CERN"]) newFile.create() self.multipleSiteFileset.addFile(newFile) self.multipleSiteFileset.commit() testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test") testWorkflow.create() self.multipleFileSubscription = Subscription( fileset=self.multipleFileFileset, workflow=testWorkflow, split_algo="EventBased", type="Processing") self.multipleFileSubscription.create() self.singleFileSubscription = Subscription( fileset=self.singleFileFileset, workflow=testWorkflow, split_algo="EventBased", type="Processing") self.singleFileSubscription.create() self.multipleSiteSubscription = Subscription( fileset=self.multipleSiteFileset, workflow=testWorkflow, split_algo="EventBased", type="Processing") self.multipleSiteSubscription.create() return
def __init__(self, config): """ _init_ """ BaseWorkerThread.__init__(self) myThread = threading.currentThread() self.daoFactory = DAOFactory(package="T0.WMBS", logger=logging, dbinterface=myThread.dbi) self.tier0ConfigFile = config.Tier0Feeder.tier0ConfigFile self.specDirectory = config.Tier0Feeder.specDirectory self.dropboxuser = getattr(config.Tier0Feeder, "dropboxuser", None) self.dropboxpass = getattr(config.Tier0Feeder, "dropboxpass", None) self.dqmUploadProxy = getattr(config.Tier0Feeder, "dqmUploadProxy", None) self.serviceProxy = getattr(config.Tier0Feeder, "serviceProxy", None) self.localRequestCouchDB = RequestDBWriter( config.AnalyticsDataCollector.localT0RequestDBURL, couchapp=config.AnalyticsDataCollector.RequestCouchApp) self.injectedRuns = set() hltConfConnectUrl = config.HLTConfDatabase.connectUrl dbFactoryHltConf = DBFactory(logging, dburl=hltConfConnectUrl, options={}) self.dbInterfaceHltConf = dbFactoryHltConf.connect() daoFactoryHltConf = DAOFactory(package="T0.WMBS", logger=logging, dbinterface=self.dbInterfaceHltConf) self.getHLTConfigDAO = daoFactoryHltConf( classname="RunConfig.GetHLTConfig") storageManagerConnectUrl = config.StorageManagerDatabase.connectUrl dbFactoryStorageManager = DBFactory(logging, dburl=storageManagerConnectUrl, options={}) self.dbInterfaceStorageManager = dbFactoryStorageManager.connect() self.dbInterfaceSMNotify = None if hasattr(config, "SMNotifyDatabase"): smNotifyConnectUrl = config.SMNotifyDatabase.connectUrl dbFactorySMNotify = DBFactory(logging, dburl=smNotifyConnectUrl, options={}) self.dbInterfaceSMNotify = dbFactorySMNotify.connect() self.getExpressReadyRunsDAO = None if hasattr(config, "PopConLogDatabase"): popConLogConnectUrl = getattr(config.PopConLogDatabase, "connectUrl", None) if popConLogConnectUrl != None: dbFactoryPopConLog = DBFactory(logging, dburl=popConLogConnectUrl, options={}) dbInterfacePopConLog = dbFactoryPopConLog.connect() daoFactoryPopConLog = DAOFactory( package="T0.WMBS", logger=logging, dbinterface=dbInterfacePopConLog) self.getExpressReadyRunsDAO = daoFactoryPopConLog( classname="Tier0Feeder.GetExpressReadyRuns") self.haveT0DataSvc = False if hasattr(config, "T0DataSvcDatabase"): t0datasvcConnectUrl = getattr(config.T0DataSvcDatabase, "connectUrl", None) if t0datasvcConnectUrl != None: self.haveT0DataSvc = True dbFactoryT0DataSvc = DBFactory(logging, dburl=t0datasvcConnectUrl, options={}) dbInterfaceT0DataSvc = dbFactoryT0DataSvc.connect() self.daoFactoryT0DataSvc = DAOFactory( package="T0.WMBS", logger=logging, dbinterface=dbInterfaceT0DataSvc) # # Set deployment ID # SetDeploymentIdDAO = self.daoFactory( classname="Tier0Feeder.SetDeploymentID") GetDeploymentIdDAO = self.daoFactory( classname="Tier0Feeder.GetDeploymentID") try: self.deployID = GetDeploymentIdDAO.execute() if self.deployID == 0: self.deployID = int( datetime.datetime.now().strftime("%y%m%d%H%M%S")) SetDeploymentIdDAO.execute(self.deployID) except: logging.exception( "Something went wrong with setting deployment ID") raise return
def setUp(self): """ _setUp_ Setup the database connections and schema. """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules=["WMCore.WMBS"], useDefault=False) myThread = threading.currentThread() daofactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) locationAction = daofactory(classname="Locations.New") locationAction.execute("T2_CH_CERN", pnn="T2_CH_CERN") locationAction.execute("T1_US_FNAL", pnn="T1_US_FNAL_Disk") self.testFilesetA = Fileset(name="FilesetA") self.testFilesetA.create() self.testFilesetB = Fileset(name="FilesetB") self.testFilesetB.create() self.testFileA = File("testFileA", size=1000, events=100, locations=set(["T2_CH_CERN"])) self.testFileA.create() self.testFileB = File("testFileB", size=1000, events=100, locations=set(["T2_CH_CERN"])) self.testFileB.create() self.testFileC = File("testFileC", size=1000, events=100, locations=set(["T2_CH_CERN"])) self.testFileC.create() self.testFilesetA.addFile(self.testFileA) self.testFilesetA.addFile(self.testFileB) self.testFilesetA.addFile(self.testFileC) self.testFilesetA.commit() self.testFileD = File("testFileD", size=1000, events=100, locations=set(["T2_CH_CERN"])) self.testFileD.create() self.testFileE = File("testFileE", size=1000, events=100, locations=set(["T2_CH_CERN"])) self.testFileE.create() self.testFileF = File("testFileF", size=1000, events=100, locations=set(["T2_CH_CERN"])) self.testFileF.create() self.testFilesetB.addFile(self.testFileD) self.testFilesetB.addFile(self.testFileE) self.testFilesetB.addFile(self.testFileF) self.testFilesetB.commit() testWorkflowA = Workflow(spec="specA.xml", owner="Steve", name="wfA", task="Test") testWorkflowA.create() testWorkflowB = Workflow(spec="specB.xml", owner="Steve", name="wfB", task="Test") testWorkflowB.create() testWorkflowC = Workflow(spec="specC.xml", owner="Steve", name="wfC", task="Test") testWorkflowC.create() testWorkflowD = Workflow(spec="specD.xml", owner="Steve", name="wfD", task="Test") testWorkflowD.create() self.testSubscriptionA = Subscription(fileset=self.testFilesetA, workflow=testWorkflowA, split_algo="FileBased", type="Processing") self.testSubscriptionA.create() self.testSubscriptionB = Subscription(fileset=self.testFilesetB, workflow=testWorkflowB, split_algo="FileBased", type="Processing") self.testSubscriptionB.create() self.testSubscriptionC = Subscription(fileset=self.testFilesetB, workflow=testWorkflowC, split_algo="FileBased", type="Processing") self.testSubscriptionC.create() self.testSubscriptionD = Subscription(fileset=self.testFilesetB, workflow=testWorkflowD, split_algo="FileBased", type="Processing") self.testSubscriptionD.create() deleteWorkflow = Workflow(spec="specE.xml", owner="Steve", name="wfE", task="Test") deleteWorkflow.create() self.deleteSubscriptionA = Subscription( fileset=self.testFilesetA, workflow=deleteWorkflow, split_algo="SiblingProcessingBased", type="Cleanup") self.deleteSubscriptionA.create() self.deleteSubscriptionB = Subscription( fileset=self.testFilesetB, workflow=deleteWorkflow, split_algo="SiblingProcessingBased", type="Cleanup") self.deleteSubscriptionB.create() return