def testSubscription(self): """ _testSubscription_ Verify that the subscription API works. """ datasetA = "/%s/WMCorePhEDExTest/RAW" % makeUUID() datasetB = "/%s/WMCorePhEDExTest/RECO" % makeUUID() xmlData = XMLDrop.makePhEDExDrop(self.dbsTestUrl, datasetA) self.phedexApi.injectBlocks("T1_US_FNAL_MSS", xmlData) xmlData = XMLDrop.makePhEDExDrop(self.dbsTestUrl, datasetB) self.phedexApi.injectBlocks("T1_US_FNAL_MSS", xmlData) testSub = PhEDExSubscription([datasetA, datasetB], "T1_UK_RAL_MSS", "Saturn") xmlData = XMLDrop.makePhEDExXMLForDatasets(self.dbsTestUrl, testSub.getDatasetPaths()) result = self.phedexApi.subscribe(testSub, xmlData) requestIDs = result["phedex"]["request_created"] self.assertEqual(len(requestIDs), 1, "Error: Wrong number of request IDs") self.assertTrue(requestIDs[0].has_key("id"), "Error: Missing request ID") return
def setUp(self): """ _setUp_ Install the DBSBuffer schema into the database and connect to PhEDEx. """ self.phedexURL = "https://cmsweb.cern.ch/phedex/datasvc/json/test" self.dbsURL = "http://vocms09.cern.ch:8880/cms_dbs_int_local_yy_writer/servlet/DBSServlet" self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection(destroyAllDatabase = True) self.testInit.setSchema(customModules = ["WMComponent.DBS3Buffer"], useDefault = False) myThread = threading.currentThread() daofactory = DAOFactory(package = "WMComponent.DBSBuffer.Database", logger = myThread.logger, dbinterface = myThread.dbi) locationAction = daofactory(classname = "DBSBufferFiles.AddLocation") locationAction.execute(siteName = "srm-cms.cern.ch") locationAction.execute(siteName = "se.fnal.gov") self.testFilesA = [] self.testFilesB = [] self.testDatasetA = "/%s/PromptReco-v1/RECO" % makeUUID() self.testDatasetB = "/%s/CRUZET11-v1/RAW" % makeUUID() self.phedex = PhEDEx({"endpoint": self.phedexURL}, "json") return
def testLotsOfAncestors(self): """ _testLotsOfAncestors_ Create a file with 15 parents with each parent having 100 parents to verify that the query to return grandparents works correctly. """ raise nose.SkipTest testFileA = File(lfn = "/this/is/a/lfnA", size = 1024, events = 10, checksums = {"cksum": "1"}, locations = "se1.fnal.gov") testFileA.create() for i in xrange(15): testParent = File(lfn = makeUUID(), size = 1024, events = 10, checksums = {"cksum": "1"}, locations = "se1.fnal.gov") testParent.create() testFileA.addParent(testParent["lfn"]) for i in xrange(100): testGParent = File(lfn = makeUUID(), size = 1024, events = 10, checksums = {"cksum": "1"}, locations = "se1.fnal.gov") testGParent.create() testParent.addParent(testGParent["lfn"]) assert len(testFileA.getAncestors(level = 2, type = "lfn")) == 1500, \ "ERROR: Incorrect grand parents returned" return
def createJobGroups(self, nSubs, nJobs, task, workloadSpec, site, bl=[], wl=[], taskType='Processing', name=None): """ _createJobGroups_ Creates a series of jobGroups for submissions """ jobGroupList = [] if name is None: name = makeUUID() testWorkflow = Workflow(spec=workloadSpec, owner="tapas", name=name, task="basicWorkload/Production") testWorkflow.create() # Create subscriptions for _ in range(nSubs): name = makeUUID() # Create Fileset, Subscription, jobGroup testFileset = Fileset(name=name) testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow, type=taskType, split_algo="FileBased") testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() # Create jobs self.makeNJobs(name=name, task=task, nJobs=nJobs, jobGroup=testJobGroup, fileset=testFileset, sub=testSubscription.exists(), site=site, bl=bl, wl=wl) testFileset.commit() testJobGroup.commit() jobGroupList.append(testJobGroup) return jobGroupList
def create(self): """ Add the new jobgroup to WMBS, create the output Fileset object """ myThread = threading.currentThread() existingTransaction = self.beginTransaction() #overwrite base class self.output for WMBS fileset self.output = Fileset(name=makeUUID()) self.output.create() if self.uid == None: self.uid = makeUUID() action = self.daofactory(classname="JobGroup.New") action.execute(self.uid, self.subscription["id"], self.output.id, conn=self.getDBConn(), transaction=self.existingTransaction()) self.id = self.exists() self.commitTransaction(existingTransaction) return
def setUp(self): """ _setUp_ Install the DBSBuffer schema into the database and connect to PhEDEx. """ self.phedexURL = "https://cmsweb.cern.ch/phedex/datasvc/json/test" self.dbsURL = "http://vocms09.cern.ch:8880/cms_dbs_int_local_yy_writer/servlet/DBSServlet" self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection(destroyAllDatabase=True) self.testInit.setSchema(customModules=["WMComponent.DBS3Buffer"], useDefault=False) myThread = threading.currentThread() daofactory = DAOFactory(package="WMComponent.DBSBuffer.Database", logger=myThread.logger, dbinterface=myThread.dbi) locationAction = daofactory(classname="DBSBufferFiles.AddLocation") locationAction.execute(siteName="srm-cms.cern.ch") locationAction.execute(siteName="se.fnal.gov") self.testFilesA = [] self.testFilesB = [] self.testDatasetA = "/%s/PromptReco-v1/RECO" % makeUUID() self.testDatasetB = "/%s/CRUZET11-v1/RAW" % makeUUID() self.phedex = PhEDEx({"endpoint": self.phedexURL}, "json") return
def testTime(self): nUIDs = 100000 startTime = time.clock() for i in range(0,nUIDs): makeUUID() print("We can make %i UUIDs in %f seconds" %(nUIDs, time.clock() - startTime))
def create(self): """ Add the new jobgroup to WMBS, create the output Fileset object """ myThread = threading.currentThread() existingTransaction = self.beginTransaction() # overwrite base class self.output for WMBS fileset self.output = Fileset(name=makeUUID()) self.output.create() if self.uid == None: self.uid = makeUUID() action = self.daofactory(classname="JobGroup.New") action.execute( self.uid, self.subscription["id"], self.output.id, conn=self.getDBConn(), transaction=self.existingTransaction(), ) self.id = self.exists() self.commitTransaction(existingTransaction) return
def createTestJob(self, subscriptionType="Merge"): """ _createTestJob_ Create a test job with two files as input. This will also create the appropriate workflow, jobgroup and subscription. """ testWorkflow = Workflow(spec=makeUUID(), owner="Simon", name=makeUUID(), task="Test") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow, type=subscriptionType) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) testFileA.addRun(Run(1, *[45])) testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10) testFileB.addRun(Run(1, *[46])) testFileA.create() testFileB.create() testJob = Job(name=makeUUID(), files=[testFileA, testFileB]) testJob["couch_record"] = "somecouchrecord" testJob["location"] = "test.site.ch" testJob.create(group=testJobGroup) testJob.associateFiles() return testJob
def test_AutoIncrementCheck(self): """ _AutoIncrementCheck_ Test and see whether we can find and set the auto_increment values """ myThread = threading.currentThread() if not myThread.dialect.lower() == "mysql": return testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test") testWorkflow.create() testFileset = Fileset(name="TestFileset") testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow) testSubscription.create() testFileA = File(lfn=makeUUID(), locations="test.site.ch") testFileB = File(lfn=makeUUID(), locations="test.site.ch") testFileA.create() testFileB.create() testFileset.addFile([testFileA, testFileB]) testFileset.commit() testSubscription.acquireFiles([testFileA, testFileB]) testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() incrementDAO = self.daoFactory(classname="Jobs.AutoIncrementCheck") incrementDAO.execute() testJob = Job() testJob.create(group=testJobGroup) self.assertEqual(testJob.exists(), 1) incrementDAO.execute() testJob = Job() testJob.create(group=testJobGroup) self.assertEqual(testJob.exists(), 2) incrementDAO.execute(input=10) testJob = Job() testJob.create(group=testJobGroup) self.assertEqual(testJob.exists(), 11) incrementDAO.execute(input=5) testJob = Job() testJob.create(group=testJobGroup) self.assertEqual(testJob.exists(), 12) return
def testTime(self): nUIDs = 100000 startTime = time.clock() for i in range(0, nUIDs): makeUUID() print "We can make %i UUIDs in %f seconds" % (nUIDs, time.clock() - startTime)
def testMask(self): """ _testMask_ Test the new mask setup """ testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test") testWorkflow.create() testFileset = Fileset(name="TestFileset") testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow) testSubscription.create() testFileA = File(lfn=makeUUID(), locations="test.site.ch") testFileB = File(lfn=makeUUID(), locations="test.site.ch") testFileA.create() testFileB.create() testFileset.addFile([testFileA, testFileB]) testFileset.commit() testSubscription.acquireFiles([testFileA, testFileB]) testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testJob = Job() testJob["mask"].addRunAndLumis(run=100, lumis=[101, 102]) testJob["mask"].addRunAndLumis(run=200, lumis=[201, 202]) testJob.create(group=testJobGroup) loadJob = Job(id=testJob.exists()) loadJob.loadData() runs = loadJob["mask"].getRunAndLumis() self.assertEqual(len(runs), 2) self.assertEqual(runs[100], [[101, 102]]) self.assertEqual(runs[200], [[201, 202]]) bigRun = Run(100, *[101, 102, 103, 104]) badRun = Run(300, *[1001, 1002]) result = loadJob["mask"].filterRunLumisByMask([bigRun, badRun]) self.assertEqual(len(result), 1) alteredRun = result.pop() self.assertEqual(alteredRun.run, 100) self.assertEqual(alteredRun.lumis, [101, 102]) run0 = Run(300, *[1001, 1002]) run1 = Run(300, *[1001, 1002]) loadJob["mask"].filterRunLumisByMask([run0, run1]) return
def createFilesWithChildren(self, moreParentFiles, acqEra): """ _createFilesWithChildren_ Create several parentless files and then create child files. """ parentFiles = [] childFiles = [] baseLFN = "/store/data/%s/Cosmics/RAW/v1/000/143/316/" % (acqEra) for i in range(10): testFile = DBSBufferFile(lfn = baseLFN + makeUUID() + ".root", size = 1024, events = 20, checksums = {"cksum": 1}) testFile.setAlgorithm(appName = "cmsRun", appVer = "CMSSW_3_1_1", appFam = "RAW", psetHash = "GIBBERISH", configContent = "MOREGIBBERISH") testFile.setDatasetPath("/Cosmics/%s-v1/RAW" % (acqEra)) lumis = [] for j in range(10): lumis.append((i * 10) + j) testFile.addRun(Run(143316, *lumis)) testFile.setAcquisitionEra(acqEra) testFile.setProcessingVer("1") testFile.setGlobalTag("START54::All") testFile.create() testFile.setLocation("malpaquet") parentFiles.append(testFile) baseLFN = "/store/data/%s/Cosmics/RECO/v1/000/143/316/" % (acqEra) for i in range(5): testFile = DBSBufferFile(lfn = baseLFN + makeUUID() + ".root", size = 1024, events = 20, checksums = {"cksum": 1}) testFile.setAlgorithm(appName = "cmsRun", appVer = "CMSSW_3_1_1", appFam = "RECO", psetHash = "GIBBERISH", configContent = "MOREGIBBERISH") testFile.setDatasetPath("/Cosmics/%s-v1/RECO" % (acqEra)) lumis = [] for j in range(20): lumis.append((i * 20) + j) testFile.addRun(Run(143316, *lumis)) testFile.setAcquisitionEra(acqEra) testFile.setProcessingVer("1") testFile.setGlobalTag("START54::All") testFile.create() testFile.setLocation("malpaquet") testFile.addParents([parentFiles[i * 2]["lfn"], parentFiles[i * 2 + 1]["lfn"]]) testFile.addParents([moreParentFiles[i * 2]["lfn"], moreParentFiles[i * 2 + 1]["lfn"]]) childFiles.append(testFile) return (parentFiles, childFiles)
def createJobGroups(self, nSubs, nJobs, task, workloadSpec, site=None, bl=[], wl=[]): """ Creates a series of jobGroups for submissions """ jobGroupList = [] testWorkflow = Workflow(spec=workloadSpec, owner="mnorman", name=makeUUID(), task="basicWorkload/Production", owner_vogroup='phgroup', owner_vorole='cmsrole') testWorkflow.create() # Create subscriptions for i in range(nSubs): name = makeUUID() # Create Fileset, Subscription, jobGroup testFileset = Fileset(name=name) testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow, type="Processing", split_algo="FileBased") testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() # Create jobs self.makeNJobs(name=name, task=task, nJobs=nJobs, jobGroup=testJobGroup, fileset=testFileset, sub=testSubscription.exists(), site=site, bl=bl, wl=wl) testFileset.commit() testJobGroup.commit() jobGroupList.append(testJobGroup) return jobGroupList
def setUp(self): """ _setUp_ Create two subscriptions: One that contains a single file and one that contains multiple files. """ self.multipleFileFileset = Fileset(name="TestFileset1") for i in range(10): newFile = File(makeUUID(), size=1000, events=100) newFile.addRun(Run(i, *[45 + i])) self.multipleFileFileset.addFile(newFile) self.singleFileFileset = Fileset(name="TestFileset2") newFile = File("/some/file/name", size=1000, events=100) newFile.addRun(Run(1, *[45])) self.singleFileFileset.addFile(newFile) self.multipleFileLumiset = Fileset(name="TestFileset3") for i in range(10): newFile = File(makeUUID(), size=1000, events=100) newFile.addRun(Run(1, *[45 + i / 3])) self.multipleFileLumiset.addFile(newFile) self.singleLumiFileset = Fileset(name="TestFileset4") for i in range(10): newFile = File(makeUUID(), size=1000, events=100) newFile.addRun(Run(1, *[45])) self.singleLumiFileset.addFile(newFile) testWorkflow = Workflow() self.multipleFileSubscription = Subscription( fileset=self.multipleFileFileset, workflow=testWorkflow, split_algo="FixedDelay", type="Processing") self.singleFileSubscription = Subscription( fileset=self.singleFileFileset, workflow=testWorkflow, split_algo="FixedDelay", type="Processing") self.multipleLumiSubscription = Subscription( fileset=self.multipleFileLumiset, workflow=testWorkflow, split_algo="FixedDelay", type="Processing") self.singleLumiSubscription = Subscription( fileset=self.singleLumiFileset, workflow=testWorkflow, split_algo="FixedDelay", type="Processing") return
def createJobGroups(self, nSubs, nJobs, task, workloadSpec, site=None, bl=[], wl=[]): """ Creates a series of jobGroups for submissions """ jobGroupList = [] testWorkflow = Workflow( spec=workloadSpec, owner="tapas", name=makeUUID(), task="basicWorkload/Production", owner_vogroup="phgroup", owner_vorole="cmsrole", ) testWorkflow.create() # Create subscriptions for i in range(nSubs): name = makeUUID() # Create Fileset, Subscription, jobGroup testFileset = Fileset(name=name) testFileset.create() testSubscription = Subscription( fileset=testFileset, workflow=testWorkflow, type="Processing", split_algo="FileBased" ) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() # Create jobs self.makeNJobs( name=name, task=task, nJobs=nJobs, jobGroup=testJobGroup, fileset=testFileset, sub=testSubscription.exists(), site=site, bl=bl, wl=wl, ) testFileset.commit() testJobGroup.commit() jobGroupList.append(testJobGroup) return jobGroupList
def getFiles(self, name, tier, nFiles=12, site="malpaquet", nLumis=1): """ _getFiles_ Create some dummy test files. """ files = [] (acqEra, procVer) = name.split("-") baseLFN = "/store/data/%s/Cosmics/RECO/%s/000/143/316/" % (acqEra, procVer) for f in range(nFiles): testFile = DBSBufferFile(lfn=baseLFN + makeUUID() + ".root", size=1024, events=20, checksums={"cksum": 1}) testFile.setAlgorithm(appName="cmsRun", appVer="CMSSW_3_1_1", appFam="RECO", psetHash="GIBBERISH", configContent="MOREGIBBERISH") testFile.setDatasetPath("/Cosmics/%s-%s/RECO" % (acqEra, procVer)) lumis = [] for i in range(nLumis): lumis.append((f * 1000000) + i) testFile.addRun(Run(1, *lumis)) testFile.setAcquisitionEra(acqEra) testFile.setProcessingVer("0") testFile.setGlobalTag("START54::All") testFile.create() testFile.setLocation(site) files.append(testFile) baseLFN = "/store/data/%s/Cosmics/RAW-RECO/%s/000/143/316/" % (acqEra, procVer) testFileChild = DBSBufferFile(lfn=baseLFN + makeUUID() + ".root", size=1024, events=10, checksums={'cksum': 1}) testFileChild.setAlgorithm(appName="cmsRun", appVer="CMSSW_3_1_1", appFam="RAW-RECO", psetHash="GIBBERISH", configContent="MOREGIBBERISH") testFileChild.setDatasetPath("/Cosmics/%s-%s/RAW-RECO" % (acqEra, procVer)) testFileChild.addRun(Run(1, *[45])) testFileChild.create() testFileChild.setLocation(site) testFileChild.addParents([x['lfn'] for x in files]) return files
def createResubmitSpec(self, serverUrl, couchDB): """ _createResubmitSpec_ Create a bogus resubmit workload. """ self.site = "cmssrm.fnal.gov" workload = WMWorkloadHelper(WMWorkload("TestWorkload")) reco = workload.newTask("reco") workload.setOwnerDetails(name = "evansde77", group = "DMWM") # first task uses the input dataset reco.addInputDataset(primary = "PRIMARY", processed = "processed-v1", tier = "TIER1") reco.data.input.splitting.algorithm = "File" reco.setTaskType("Processing") cmsRunReco = reco.makeStep("cmsRun1") cmsRunReco.setStepType("CMSSW") reco.applyTemplates() cmsRunRecoHelper = cmsRunReco.getTypeHelper() cmsRunRecoHelper.addOutputModule("outputRECO", primaryDataset = "PRIMARY", processedDataset = "processed-v2", dataTier = "TIER2", lfnBase = "/store/dunkindonuts", mergedLFNBase = "/store/kfc") dcs = DataCollectionService(url = serverUrl, database = couchDB) def getJob(workload): job = Job() job["task"] = workload.getTask("reco").getPathName() job["workflow"] = workload.name() job["location"] = self.site job["owner"] = "evansde77" job["group"] = "DMWM" return job testFileA = WMFile(lfn = makeUUID(), size = 1024, events = 1024) testFileA.setLocation([self.site]) testFileA.addRun(Run(1, 1, 2)) testFileB = WMFile(lfn = makeUUID(), size = 1024, events = 1024) testFileB.setLocation([self.site]) testFileB.addRun(Run(1, 3, 4)) testJobA = getJob(workload) testJobA.addFile(testFileA) testJobA.addFile(testFileB) dcs.failedJobs([testJobA]) topLevelTask = workload.getTopLevelTask()[0] workload.truncate("Resubmit_TestWorkload", topLevelTask.getPathName(), serverUrl, couchDB) return workload
def setUp(self): """ _setUp_ Create two subscriptions: One that contains a single file and one that contains multiple files. """ self.multipleFileFileset = Fileset(name = "TestFileset1") for i in range(10): newFile = File(makeUUID(), size = 1000, events = 100, locations = set(["somese.cern.ch"])) newFile.addRun(Run(i, *[45+i])) self.multipleFileFileset.addFile(newFile) self.singleFileFileset = Fileset(name = "TestFileset2") newFile = File("/some/file/name", size = 1000, events = 100, locations = set(["somese.cern.ch"])) newFile.addRun(Run(1, *[45])) self.singleFileFileset.addFile(newFile) self.multipleFileLumiset = Fileset(name = "TestFileset3") for i in range(10): newFile = File(makeUUID(), size = 1000, events = 100, locations = set(["somese.cern.ch"])) newFile.addRun(Run(1, *[45+i/3])) self.multipleFileLumiset.addFile(newFile) self.singleLumiFileset = Fileset(name = "TestFileset4") for i in range(10): newFile = File(makeUUID(), size = 1000, events = 100, locations = set(["somese.cern.ch"])) newFile.addRun(Run(1, *[45])) self.singleLumiFileset.addFile(newFile) testWorkflow = Workflow() self.multipleFileSubscription = Subscription(fileset = self.multipleFileFileset, workflow = testWorkflow, split_algo = "EndOfRun", type = "Processing") self.singleFileSubscription = Subscription(fileset = self.singleFileFileset, workflow = testWorkflow, split_algo = "EndOfRun", type = "Processing") self.multipleLumiSubscription = Subscription(fileset = self.multipleFileLumiset, workflow = testWorkflow, split_algo = "EndOfRun", type = "Processing") self.singleLumiSubscription = Subscription(fileset = self.singleLumiFileset, workflow = testWorkflow, split_algo = "EndOfRun", type = "Processing") return
def createFile(): """ _createFile_ Create a file with some random metdata. """ newFile = File(lfn = makeUUID(), size = random.randrange(1024, 1048576, 1024), events = random.randrange(10, 100000, 50), parents = [File(lfn = makeUUID())], locations = makeUUID()) newFile["first_event"] = 0 newFile["last_event"] = 0 newFile["id"] = 1 return newFile
def testListRunningJobs(self): """ _testListRunningJobs_ Test the ListRunningJobs DAO. """ testWorkflow = Workflow(spec = makeUUID(), owner = "Steve", name = makeUUID(), task="Test") testWorkflow.create() testFileset = Fileset(name = "TestFileset") testFileset.create() testSubscription = Subscription(fileset = testFileset, workflow = testWorkflow, type = "Processing") testSubscription.create() testJobGroup = JobGroup(subscription = testSubscription) testJobGroup.create() testJobA = Job(name = makeUUID(), files = []) testJobA["couch_record"] = makeUUID() testJobA.create(group = testJobGroup) testJobA["state"] = "executing" testJobB = Job(name = makeUUID(), files = []) testJobB["couch_record"] = makeUUID() testJobB.create(group = testJobGroup) testJobB["state"] = "complete" testJobC = Job(name = makeUUID(), files = []) testJobC["couch_record"] = makeUUID() testJobC.create(group = testJobGroup) testJobC["state"] = "new" changeStateAction = self.daoFactory(classname = "Jobs.ChangeState") changeStateAction.execute(jobs = [testJobA, testJobB, testJobC]) runningJobsAction = self.daoFactory(classname = "Monitoring.ListRunningJobs") runningJobs = runningJobsAction.execute() assert len(runningJobs) == 2, \ "Error: Wrong number of running jobs returned." for runningJob in runningJobs: if runningJob["job_name"] == testJobA["name"]: assert runningJob["state"] == testJobA["state"], \ "Error: Running job has wrong state." assert runningJob["couch_record"] == testJobA["couch_record"], \ "Error: Running job has wrong couch record." else: assert runningJob["job_name"] == testJobC["name"], \ "Error: Running job has wrong name." assert runningJob["state"] == testJobC["state"], \ "Error: Running job has wrong state." assert runningJob["couch_record"] == testJobC["couch_record"], \ "Error: Running job has wrong couch record." return
def createFile(): """ _createFile_ Create a file with some random metdata. """ newFile = File(lfn=makeUUID(), size=random.randrange(1024, 1048576, 1024), events=random.randrange(10, 100000, 50), parents=[File(lfn=makeUUID())], locations=makeUUID()) newFile["first_event"] = 0 newFile["last_event"] = 0 newFile["id"] = 1 return newFile
class FileAndEventBased(JobFactory): """ Split jobs by number of events """ def algorithm(self, *args, **kwargs): """ _algorithm_ An event base splitting algorithm. All available files are split into a set number of events per job. """ jobGroups = [] fileset = self.subscription.availableFiles() # // # // get the event total #// eventsPerJob = int(kwargs.get("events_per_job", 5000)) try: selectionAlgorithm = kwargs['selection_algorithm'] except KeyError, e: selectionAlgorithm = None carryOver = 0 for f in fileset: if selectionAlgorithm: if not selectionAlgorithm(f): self.subscription.completeFiles([f]) continue self.newGroup() eventsInFile = int(f["events"]) if eventsInFile == 0: self.newJob(name=makeUUID()) self.currentJob.addFile(f) self.currentJob["mask"].setMaxAndSkipEvents(eventsPerJob, 0) continue currentEvent = 0 while currentEvent < eventsInFile: self.newJob(name=makeUUID()) self.currentJob.addFile(f) self.currentJob["mask"].setMaxAndSkipEvents( eventsPerJob, currentEvent) currentEvent += eventsPerJob return
def setUp(self): """ _setUp_ Create two subscriptions: One that contains a single file and one that contains multiple files. """ self.multipleFileFileset = Fileset(name = "TestFileset1") for i in range(10): newFile = File(makeUUID(), size = 1000, events = 100) newFile.setLocation('blenheim') newFile.setLocation('malpaquet') self.multipleFileFileset.addFile(newFile) self.singleFileFileset = Fileset(name = "TestFileset2") newFile = File("/some/file/name", size = 1000, events = 100) newFile.setLocation('blenheim') self.singleFileFileset.addFile(newFile) testWorkflow = Workflow() self.multipleFileSubscription = Subscription(fileset = self.multipleFileFileset, workflow = testWorkflow, split_algo = "FileBased", type = "Processing") self.singleFileSubscription = Subscription(fileset = self.singleFileFileset, workflow = testWorkflow, split_algo = "FileBased", type = "Processing") #self.multipleFileSubscription.create() #self.singleFileSubscription.create() return
def getBlock(self, newFile, dasBlocks, location, das): """ _getBlock_ This gets a new block by checking whether there is a pre-existant block. """ for block in dasBlocks: if not self.isBlockOpen(newFile = newFile, block = block): # Then the block can't fit the file # Close the block block.status = 'Pending' self.blockCache[block.getName()] = block dasBlocks.remove(block.getName()) else: # Load it out of the cache currentBlock = self.blockCache.get(block.getName()) return currentBlock # If there are no open blocks # Or we run out of blocks blockname = '%s#%s' % (newFile['datasetPath'], makeUUID()) newBlock = DBSBlock(name = blockname, location = location, das = das) self.addNewBlock(block = newBlock) dasBlocks.append(blockname) return newBlock
def main(): """main functionf for testing""" from WMCore.DataStructs.Job import Job from WMCore.DataStructs.File import File from WMCore.DataStructs.Run import Run from WMCore.DataStructs.JobPackage import JobPackage from WMCore.Services.UUID import makeUUID from WMCore.WMSpec.Makers.TaskMaker import TaskMaker factory = HarvestingWorkloadFactory() workload = factory("derp", getTestArguments()) task = workload.getTask('Harvesting') job = Job("SampleJob") job["id"] = makeUUID() job["task"] = task.getPathName() job["workflow"] = workload.name() file = File(lfn="/store/relval/CMSSW_3_8_2/RelValMinBias/GEN-SIM-RECO/MC_38Y_V9-v1/0019/FEC5BB4D-BFAF-DF11-A52A-001A92810AD2.root") job.addFile(file) jpackage = JobPackage() jpackage[1] = job import pickle handle = open("%s/JobPackage.pkl" % os.getcwd(), 'w') pickle.dump(jpackage, handle) handle.close() taskMaker = TaskMaker(workload, os.getcwd()) taskMaker.skipSubscription = True taskMaker.processWorkload() task.build(os.getcwd())
def getBlock(self, newFile, dasBlocks, location, das): """ _getBlock_ This gets a new block by checking whether there is a pre-existant block. """ for blockName in dasBlocks: block = self.blockCache.get(blockName) if not self.isBlockOpen(newFile=newFile, block=block): # Then the block can't fit the file # Close the block block.status = 'Pending' self.blockCache[blockName] = block dasBlocks.remove(blockName) else: # Load it out of the cache currentBlock = blockName return currentBlock # If there are no open blocks # Or we run out of blocks blockname = '%s#%s' % (newFile['datasetPath'], makeUUID()) newBlock = DBSBlock(name=blockname, location=location, das=das) self.addNewBlock(block=newBlock) dasBlocks.append(blockname) return newBlock
def createLargeFileBlock(self): """ _createLargeFileBlock_ Creates a large group of files for testing """ testFileset = Fileset(name = "TestFilesetX") testFileset.create() for i in range(5000): newFile = File(makeUUID(), size = 1000, events = 100, locations = set(["somese.cern.ch"])) newFile.create() testFileset.addFile(newFile) testFileset.commit() testWorkflow = Workflow(spec = "spec.xml", owner = "mnorman", name = "wf003", task="Test" ) testWorkflow.create() largeSubscription = Subscription(fileset = testFileset, workflow = testWorkflow, split_algo = "FileBased", type = "Processing") largeSubscription.create() return largeSubscription
def testD_Profile(self): """ _Profile_ Profile with cProfile and time various pieces """ return config = self.createConfig() name = "ThisIsATest_%s" % (makeUUID()) tier = "RECO" nFiles = 500 files = self.getFiles(name=name, tier=tier, nFiles=nFiles) datasetPath = '/%s/%s/%s' % (name, name, tier) testDBSUpload = DBSUploadPoller(config=config) cProfile.runctx("testDBSUpload.algorithm()", globals(), locals(), filename="testStats.stat") p = pstats.Stats('testStats.stat') p.sort_stats('cumulative') p.print_stats(0.2) return
def testC_Profile(self): """ _Profile_ DON'T RUN THIS! """ return import cProfile, pstats myThread = threading.currentThread() name = makeUUID() config = self.getConfig() jobList = self.createGiantJobSet(name = name, config = config, nSubs = 10, nJobs = 1000, nFiles = 10) cleanCouch = CleanCouchPoller(config = config) cleanCouch.setup() cProfile.runctx("cleanCouch.algorithm()", globals(), locals(), filename = "testStats.stat") p = pstats.Stats('testStats.stat') p.sort_stats('cumulative') p.print_stats() return
def algorithm(self, *args, **kwargs): """ _algorithm_ Implement merge algorithm for the subscription provided """ fileset = list(self.subscription.availableFiles()) mergeSize = int(kwargs['merge_size']) overflow = bool(kwargs.get('all_files', False)) fileset.sort() accumSize = 0 jobFiles = Fileset() locationDict = self.sortByLocation() for location in locationDict: baseName = makeUUID() self.newGroup() for f in locationDict[location]: accumSize += f['size'] jobFiles.addFile(f) if accumSize >= mergeSize: self.newJob(name = '%s-%s' % (baseName, len(self.currentGroup.jobs) + 1), files = jobFiles) self.currentJob["mask"].setMaxAndSkipEvents(-1, 0) accumSize = 0 jobFiles = Fileset() if len(jobFiles) > 0: if overflow: self.newJob(name = '%s-%s' % (baseName, len(self.currentGroup.jobs) + 1), files = jobFiles) self.currentJob["mask"].setMaxAndSkipEvents(-1, 0)
def test05(self): """ _test05_ Test repacking of multiple lumis with holes in the lumi sequence Multi lumi input """ mySplitArgs = self.splitArgs.copy() insertClosedLumiBinds = [] for lumi in [1, 2, 4]: filecount = 2 for i in range(filecount): newFile = File(makeUUID(), size = 1000, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomeSE", immediateSave = False) newFile.create() self.fileset1.addFile(newFile) insertClosedLumiBinds.append( { 'RUN' : 1, 'LUMI' : lumi, 'STREAM' : "A", 'FILECOUNT' : filecount, 'INSERT_TIME' : self.currentTime, 'CLOSE_TIME' : self.currentTime } ) self.fileset1.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription1) self.insertClosedLumiDAO.execute(binds = insertClosedLumiBinds, transaction = False) mySplitArgs['maxInputFiles'] = 5 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") self.insertClosedLumiDAO.execute(binds = { 'RUN' : 1, 'LUMI' : 3, 'STREAM' : "A", 'FILECOUNT' : 0, 'INSERT_TIME' : self.currentTime, 'CLOSE_TIME' : self.currentTime }, transaction = False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") self.assertEqual(len(jobGroups[0].jobs[0].getFiles()), 4, "ERROR: first job does not process 4 files") return
def createJob(self, streamerList, jobEvents, jobSize, memoryRequirement): """ _createJob_ create an express job processing the passed in list of streamers """ if not self.createdGroup: self.newGroup() self.createdGroup = True self.newJob(name="%s-%s" % (self.jobNamePrefix, makeUUID())) for streamer in streamerList: f = File(id=streamer['id'], lfn=streamer['lfn']) f.setLocation(streamer['location'], immediateSave=False) self.currentJob.addFile(f) # job time based on # - 5 min initialization # - 0.5MB/s repack speed # - 45s/evt reco speed # - checksum calculation at 5MB/s # - stageout at 5MB/s # job disk based on # - streamer or RAW on local disk (factor 1) # - FEVT/ALCARECO/DQM on local disk (factor 4) jobTime = 300 + jobSize / 500000 + jobEvents * 45 + (jobSize * 4 * 2) / 5000000 self.currentJob.addResourceEstimates(jobTime=jobTime, disk=(jobSize * 5) / 1024, memory=memoryRequirement) return
def getBlock(self, newFile, location, das, skipOpenCheck=False): """ _getBlock_ Retrieve a block is one exists and is open. If no open block is found create and return a new one. """ if das in self.dasCache.keys() and location in self.dasCache[das].keys( ): for blockName in self.dasCache[das][location]: block = self.blockCache.get(blockName) if not self.isBlockOpen(newFile=newFile, block=block) and not skipOpenCheck: # Block isn't open anymore. Mark it as pending so that it gets # uploaded. block.setPendingAndCloseBlock() self.blockCache[blockName] = block else: return block # A suitable open block does not exist. Create a new one. blockname = "%s#%s" % (newFile["datasetPath"], makeUUID()) newBlock = DBSBlock(name=blockname, location=location, das=das, workflow=newFile["workflow"]) self.addNewBlock(block=newBlock) return newBlock
def testFileset(self): """ _testFileset_ Verify that converting an ACDC fileset to a DataStructs fileset works correctly. """ testCollection = CouchCollection(database = self.testInit.couchDbName, url = self.testInit.couchUrl, name = "Thunderstruck") testCollection.setOwner(self.owner) testFileset = CouchFileset(database = self.testInit.couchDbName, url = self.testInit.couchUrl, name = "TestFileset") testCollection.addFileset(testFileset) testFiles = {} for i in range(5): lfn = makeUUID() testFile = File(lfn = lfn, size = random.randint(1024, 4096), events = random.randint(1024, 4096)) testFiles[lfn] = testFile testFileset.add([testFile]) for file in testFileset.fileset().files: self.assertTrue(file["lfn"] in testFiles.keys(), "Error: File missing.") self.assertEqual(file["events"], testFiles[file["lfn"]]["events"], "Error: Wrong number of events.") self.assertEqual(file["size"], testFiles[file["lfn"]]["size"], "Error: Wrong file size.") return
def test05(self): """ _test05_ Test multi lumis express merges with holes """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2, 4]: for i in range(2): newFile = File(makeUUID(), size=1000, events=100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave=False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription2) time.sleep(1) mySplitArgs['maxLatency'] = 1 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 2, "ERROR: JobFactory didn't create two jobs") return
def __call__(self): """ __call__ Generate some random data """ # Generate somewhere between one and a thousand files name = "ThisIsATest_%s" % (makeUUID()) nFiles = random.randint(10, 2000) name = name.replace('-', '_') name = '%s-v0' % name files = self.getFiles(name=name, nFiles=nFiles) print "Inserting %i files for dataset %s" % (nFiles * 2, name) try: self.dbsUploader.algorithm() except: self.dbsUploader.close() raise # Repeat just to make sure try: self.dbsUploader.algorithm() except: self.dbsUploader.close() raise return
def createLargeFileBlock(self): """ _createLargeFileBlock_ Creates a large group of files for testing """ testFileset = Fileset(name="TestFilesetX") testFileset.create() for i in range(5000): newFile = File(makeUUID(), size=1000, events=100, locations=set(["somese.cern.ch"])) newFile.create() testFileset.addFile(newFile) testFileset.commit() testWorkflow = Workflow(spec="spec.xml", owner="mnorman", name="wf003", task="Test") testWorkflow.create() largeSubscription = Subscription(fileset=testFileset, workflow=testWorkflow, split_algo="FileBased", type="Processing") largeSubscription.create() return largeSubscription
def createJob(self, streamerList, jobEvents, jobSize, memoryRequirement, numberOfCores = 1): """ _createJob_ """ if not self.createdGroup: self.newGroup() self.createdGroup = True self.newJob(name = "%s-%s" % (self.jobNamePrefix, makeUUID())) if numberOfCores > 1: self.currentJob.addBaggageParameter("numberOfCores", numberOfCores) for streamer in streamerList: f = File(id = streamer['id'], lfn = streamer['lfn']) f.setLocation(streamer['location'], immediateSave = False) self.currentJob.addFile(f) # job time based on # - 5 min initialization # - 0.5MB/s repack speed # - checksum calculation at 5MB/s # - stageout at 5MB/s # job disk based on # - RAW on local disk (factor 1) jobTime = 300 + jobSize/500000 + (jobSize*2)/5000000 self.currentJob.addResourceEstimates(jobTime = jobTime, disk = jobSize/1024, memory = memoryRequirement) return
def setUp(self): """ _setUp_ Create two subscriptions: One that contains a single file and one that contains multiple files. """ self.multipleFileFileset = Fileset(name="TestFileset1") for i in range(10): newFile = File(makeUUID(), size=1000, events=100) newFile.setLocation('blenheim') newFile.setLocation('malpaquet') self.multipleFileFileset.addFile(newFile) self.singleFileFileset = Fileset(name="TestFileset2") newFile = File("/some/file/name", size=1000, events=100) newFile.setLocation('blenheim') self.singleFileFileset.addFile(newFile) testWorkflow = Workflow() self.multipleFileSubscription = Subscription( fileset=self.multipleFileFileset, workflow=testWorkflow, split_algo="FileBased", type="Processing") self.singleFileSubscription = Subscription( fileset=self.singleFileFileset, workflow=testWorkflow, split_algo="FileBased", type="Processing") #self.multipleFileSubscription.create() #self.singleFileSubscription.create() return
def testE_TestNonProxySplitting(self): """ _TestNonProxySplitting_ Test and see if we can split things without a proxy. """ myThread = threading.currentThread() config = self.getConfig() config.JobCreator.workerThreads = 1 name = makeUUID() workloadName = 'TestWorkload' workload = self.createWorkload(workloadName = workloadName) workloadPath = os.path.join(self.testDir, 'workloadTest', 'TestWorkload', 'WMSandbox', 'WMWorkload.pkl') self.stuffWMBS(workflowURL = workloadPath, name = name) testJobCreator = JobCreatorPoller(config = config) testJobCreator.algorithm() getJobsAction = self.daoFactory(classname = "Jobs.GetAllJobs") result = getJobsAction.execute(state = 'Created', jobType = "Processing") self.assertEqual(len(result), 1) result = getJobsAction.execute(state = 'Created', jobType = "Merge") self.assertEqual(len(result), 0) return
def getBlock(self, newFile, location, skipOpenCheck=False): """ _getBlock_ Retrieve a block is one exists with matching datasetpath/location and is open. If no such block is found create and return a new one. """ datasetpath = newFile["datasetPath"] for block in self.blockCache.values(): if datasetpath == block.getDatasetPath( ) and location == block.getLocation(): if not self.isBlockOpen(newFile=newFile, block=block) and not skipOpenCheck: # Block isn't open anymore. Mark it as pending so that it gets uploaded. block.setPendingAndCloseBlock() else: return block # A suitable open block does not exist. Create a new one. blockname = "%s#%s" % (datasetpath, makeUUID()) newBlock = DBSBufferBlock(name=blockname, location=location, datasetpath=datasetpath) self.blockCache[blockname] = newBlock return newBlock
def algorithm(self, *args, **kwargs): """ _algorithm_ A time delay job splitting algorithm, will shove all unacquired files into a new job if the trigger_time has been passed """ # // # // get the fileset #// fileset = self.subscription.getFileset() trigger_time = int(kwargs['trigger_time']) if (trigger_time < time.time()): availFiles = self.subscription.availableFiles() if (len(availFiles) == 0): # no files to acquire return [] baseName = makeUUID() self.newGroup() self.newJob(name='%s-endofrun' % (baseName, )) for f in availFiles: self.currentJob.addFile(f)
def createSubscription(self, nFiles, lumisPerFile, twoSites=False, rand=False): """ _createSubscription_ Create a subscription for testing """ baseName = makeUUID() testFileset = Fileset(name=baseName) testFileset.create() parentFile = File('%s_parent' % (baseName), size=1000, events=100, locations=set(["somese.cern.ch"])) parentFile.create() for i in range(nFiles): newFile = File(lfn='%s_%i' % (baseName, i), size=1000, events=100, locations="somese.cern.ch") lumis = [] for lumi in range(lumisPerFile): if rand: lumis.append(random.randint(1000 * i, 1000 * (i + 1))) else: lumis.append((100 * i) + lumi) newFile.addRun(Run(i, *lumis)) newFile.create() newFile.addParent(parentFile['lfn']) testFileset.addFile(newFile) if twoSites: for i in range(nFiles): newFile = File(lfn='%s_%i_2' % (baseName, i), size=1000, events=100, locations="otherse.cern.ch") lumis = [] for lumi in range(lumisPerFile): if rand: lumis.append(random.randint(1000 * i, 1000 * (i + 1))) else: lumis.append((100 * i) + lumi) newFile.addRun(Run(i, *lumis)) newFile.create() newFile.addParent(parentFile['lfn']) testFileset.addFile(newFile) testFileset.commit() testSubscription = Subscription(fileset=testFileset, workflow=self.testWorkflow, split_algo="LumiBased", type="Processing") testSubscription.create() return testSubscription
def testC_FailTest(self): """ _FailTest_ THIS TEST IS DANGEROUS! Figure out what happens when we trigger rollbacks """ myThread = threading.currentThread() config = self.createConfig() config.DBSUpload.abortStepTwo = True originalOut = sys.stdout originalErr = sys.stderr dbsInterface = DBSInterface(config=config) localAPI = dbsInterface.getAPIRef() globeAPI = dbsInterface.getAPIRef(globalRef=True) name = "ThisIsATest_%s" % (makeUUID()) tier = "RECO" nFiles = 12 files = self.getFiles(name=name, tier=tier, nFiles=nFiles) datasetPath = "/%s/%s/%s" % (name, name, tier) testDBSUpload = DBSUploadPoller(config=config) try: testDBSUpload.algorithm() except Exception, ex: pass
def algorithm(self, *args, **kwargs): """ _algorithm_ A time delay job splitting algorithm, will shove all unacquired files into a new job if the trigger_time has been passed """ # // # // get the fileset #// fileset = self.subscription.getFileset() trigger_time = kwargs['trigger_time'] if (trigger_time < time.time()): availFiles = self.subscription.availableFiles() if (len(availFiles) == 0): # no files to acquire return [] baseName = makeUUID() self.newGroup() self.newJob(name = '%s-endofrun' % (baseName,)) for f in availFiles: self.currentJob.addFile(f)
def testF_DBSUploadQueueSizeCheckForAlerts(self): """ Test will not trigger a real alert being sent unless doing some mocking of the methods used during DBSUploadPoller.algorithm() -> DBSUploadPoller.uploadBlocks() method. As done here, it probably can't be deterministic, yet the feature shall be checked. """ sizeLevelToTest = 1 myThread = threading.currentThread() config = self.createConfig() # threshold / value to check config.DBSUpload.alertUploadQueueSize = sizeLevelToTest # without this uploadBlocks method returns immediately name = "ThisIsATest_%s" % (makeUUID()) tier = "RECO" nFiles = sizeLevelToTest + 1 files = self.getFiles(name=name, tier=tier, nFiles=nFiles) datasetPath = "/%s/%s/%s" % (name, name, tier) # load components that are necessary to check status # (this seems necessary, else some previous tests started failing) factory = WMFactory("dbsUpload", "WMComponent.DBSUpload.Database.Interface") dbinterface = factory.loadObject("UploadToDBS") dbsInterface = DBSInterface(config=config) localAPI = dbsInterface.getAPIRef() globeAPI = dbsInterface.getAPIRef(globalRef=True) testDBSUpload = DBSUploadPoller(config) # this is finally where the action (alert) should be triggered from testDBSUpload.algorithm() return
def testF_DBSUploadQueueSizeCheckForAlerts(self): """ Test will not trigger a real alert being sent unless doing some mocking of the methods used during DBSUploadPoller.algorithm() -> DBSUploadPoller.uploadBlocks() method. As done here, it probably can't be deterministic, yet the feature shall be checked. """ sizeLevelToTest = 1 myThread = threading.currentThread() config = self.createConfig() # threshold / value to check config.DBSUpload.alertUploadQueueSize = sizeLevelToTest # without this uploadBlocks method returns immediately name = "ThisIsATest_%s" % (makeUUID()) tier = "RECO" nFiles = sizeLevelToTest + 1 files = self.getFiles(name = name, tier = tier, nFiles = nFiles) datasetPath = '/%s/%s/%s' % (name, name, tier) # load components that are necessary to check status # (this seems necessary, else some previous tests started failing) factory = WMFactory("dbsUpload", "WMComponent.DBSUpload.Database.Interface") dbinterface = factory.loadObject("UploadToDBS") dbsInterface = DBSInterface(config = config) localAPI = dbsInterface.getAPIRef() globeAPI = dbsInterface.getAPIRef(globalRef = True) testDBSUpload = DBSUploadPoller(config) # this is finally where the action (alert) should be triggered from testDBSUpload.algorithm() return
def __call__(self): """ __call__ Generate some random data """ # Generate somewhere between one and a thousand files name = "ThisIsATest_%s" % (makeUUID()) nFiles = random.randint(10, 2000) name = name.replace('-', '_') name = '%s-v0' % name files = self.getFiles(name = name, nFiles = nFiles) print "Inserting %i files for dataset %s" % (nFiles * 2, name) try: self.dbsUploader.algorithm() except: self.dbsUploader.close() raise # Repeat just to make sure try: self.dbsUploader.algorithm() except: self.dbsUploader.close() raise return
def testC_FailTest(self): """ _FailTest_ THIS TEST IS DANGEROUS! Figure out what happens when we trigger rollbacks """ myThread = threading.currentThread() config = self.createConfig() config.DBSUpload.abortStepTwo = True originalOut = sys.stdout originalErr = sys.stderr dbsInterface = DBSInterface(config = config) localAPI = dbsInterface.getAPIRef() globeAPI = dbsInterface.getAPIRef(globalRef = True) name = "ThisIsATest_%s" % (makeUUID()) tier = "RECO" nFiles = 12 files = self.getFiles(name = name, tier = tier, nFiles = nFiles) datasetPath = '/%s/%s/%s' % (name, name, tier) testDBSUpload = DBSUploadPoller(config = config) try: testDBSUpload.algorithm() except Exception, ex: pass
def createSubscription(self, nFiles, lumisPerFile, twoSites=False, nEventsPerFile=100): """ _createSubscription_ Create a subscription for testing """ baseName = makeUUID() testFileset = Fileset(name=baseName) testFileset.create() for i in range(nFiles): newFile = self.createFile("%s_%i" % (baseName, i), nEventsPerFile, i, lumisPerFile, "somese.cern.ch") newFile.create() testFileset.addFile(newFile) if twoSites: for i in range(nFiles): newFile = self.createFile("%s_%i_2" % (baseName, i), nEventsPerFile, i, lumisPerFile, "otherse.cern.ch") newFile.create() testFileset.addFile(newFile) testFileset.commit() testSubscription = Subscription( fileset=testFileset, workflow=self.testWorkflow, split_algo="EventAwareLumiBased", type="Processing" ) testSubscription.create() return testSubscription
def createSubscription(self, nFiles, lumisPerFile, twoSites=False, nEventsPerFile=100): """ _createSubscription_ Create a subscription for testing """ baseName = makeUUID() testFileset = Fileset(name=baseName) for i in range(nFiles): newFile = self.createFile('%s_%i' % (baseName, i), nEventsPerFile, i, lumisPerFile, 'blenheim') testFileset.addFile(newFile) if twoSites: for i in range(nFiles): newFile = self.createFile('%s_%i_2' % (baseName, i), nEventsPerFile, i, lumisPerFile, 'malpaquet') testFileset.addFile(newFile) testSubscription = Subscription(fileset=testFileset, workflow=self.testWorkflow, split_algo="EventAwareLumiBased", type="Processing") return testSubscription
def atestC_Profile(self): """ _Profile_ DON'T RUN THIS! """ return import cProfile, pstats myThread = threading.currentThread() name = makeUUID() config = self.getConfig() jobList = self.createGiantJobSet(name=name, config=config, nSubs=10, nJobs=1000, nFiles=10) testTaskArchiver = TaskArchiverPoller(config=config) cProfile.runctx("testTaskArchiver.algorithm()", globals(), locals(), filename="testStats.stat") p = pstats.Stats('testStats.stat') p.sort_stats('cumulative') p.print_stats() return
def create(self, group): """ _create_ Write the job to the database. """ if self["id"] != None: return existingTransaction = self.beginTransaction() self["jobgroup"] = group.id if self["name"] == None: self["name"] = makeUUID() jobAction = self.daofactory(classname="Jobs.New") jobAction.execute(jobgroup=self["jobgroup"], name=self["name"], couch_record=self["couch_record"], location=self["location"], cache_dir=self['cache_dir'], outcome=self['outcome'], fwjr=self['fwjr'], conn=self.getDBConn(), transaction=self.existingTransaction()) self.exists() self['mask'].save(jobID=self['id']) self.associateFiles() self.commitTransaction(existingTransaction) return
def createJob(self, streamerList, jobEvents, jobSize, timePerEvent, sizePerEvent, memoryRequirement): """ _createJob_ create an express job processing the passed in list of streamers """ if not self.createdGroup: self.newGroup() self.createdGroup = True self.newJob(name = "%s-%s" % (self.jobNamePrefix, makeUUID())) for streamer in streamerList: f = File(id = streamer['id'], lfn = streamer['lfn']) f.setLocation(streamer['location'], immediateSave = False) self.currentJob.addFile(f) # job time based on # - 5 min initialization (twice) # - 0.5MB/s repack speed # - reco with timePerEvent # - checksum calculation at 5MB/s # - stageout at 5MB/s # job disk based on # - streamer or RAW on local disk (factor 1) # - FEVT/ALCARECO/DQM on local disk (sizePerEvent) jobTime = 600 + jobSize/500000 + jobEvents*timePerEvent + (jobEvents*sizePerEvent*2)/5000000 self.currentJob.addResourceEstimates(jobTime = min(jobTime, 47*3600), disk = min(jobSize/1024 + jobEvents*sizePerEvent, 20000000), memory = memoryRequirement) return
def createSubscription(self, nFiles, lumisPerFile, twoSites = False, nEventsPerFile = 100): """ _createSubscription_ Create a subscription for testing """ baseName = makeUUID() testFileset = Fileset(name = baseName) for i in range(nFiles): newFile = self.createFile('%s_%i' % (baseName, i), nEventsPerFile, i, lumisPerFile, 'blenheim') testFileset.addFile(newFile) if twoSites: for i in range(nFiles): newFile = self.createFile('%s_%i_2' % (baseName, i), nEventsPerFile, i, lumisPerFile, 'malpaquet') testFileset.addFile(newFile) testSubscription = Subscription(fileset = testFileset, workflow = self.testWorkflow, split_algo = "EventAwareLumiBased", type = "Processing") return testSubscription
def testUUID(self): listOfIDs = [] splitID = None for i in range(0, 1000): tmpID = makeUUID() if not splitID: splitID = tmpID.split('-') tmpSplit = tmpID.split('-') self.assertEqual(tmpSplit[1], splitID[1], "Second component of UUID not the same %s != %s" \ %(tmpSplit[1], splitID[1])) self.assertEqual(tmpSplit[2], splitID[2], "Third component of UUID not the same %s != %s" \ %(tmpSplit[2], splitID[2])) self.assertEqual(tmpSplit[4], splitID[4], "Fourth component of UUID not the same %s != %s" \ %(tmpSplit[4], splitID[4])) self.assertEqual(type(tmpID), str) self.assertEqual( listOfIDs.count(tmpID), 0, "UUID repeated! %s found in list %i times!" % (tmpID, listOfIDs.count(tmpID))) listOfIDs.append(tmpID) return