def setupACDCDatabase(self, collectionName, taskPath, user, group): """ _setupACDCDatabase_ Populate an ACDC database with bogus records associated to certain collection name, user and task path. """ acdcServer = CouchService(url = self.testInit.couchUrl, database = "%s_acdc" % self.couchDBName) owner = acdcServer.newOwner(group, user) testCollection = CouchCollection(database = self.testInit.couchDbName, url = self.testInit.couchUrl, name = collectionName) testCollection.setOwner(owner) testFileset = CouchFileset(database = self.testInit.couchDbName, url = self.testInit.couchUrl, name = taskPath) testCollection.addFileset(testFileset) testFiles = [] for _ in range(5): testFile = File(lfn = makeUUID(), size = random.randint(1024, 4096), events = random.randint(1024, 4096)) testFiles.append(testFile) testFileset.add(testFiles)
def failedJobs(self, failedJobs, useMask = True): """ _failedJobs_ Given a list of failed jobs, sort them into Filesets and record them NOTE: jobs must have a non-standard task, workflow, owner and group attributes assigned to them. """ for job in failedJobs: try: taskName = job['task'] workflow = job['workflow'] except KeyError as ex: msg = "Missing required, non-standard key %s in job in ACDC.DataCollectionService" % (str(ex)) logging.error(msg) raise ACDCDCSException(msg) coll = CouchCollection(database = self.database, url = self.url, name = workflow, type = CollectionTypes.DataCollection) owner = self.newOwner(job.get("group", "cmsdataops"), job.get("owner", "cmsdataops")) coll.setOwner(owner) fileset = CouchFileset(database = self.database, url = self.url, name = taskName) coll.addFileset(fileset) if useMask: fileset.add(files = job['input_files'], mask = job['mask']) else: fileset.add(files = job['input_files']) return
def testListFiles(self): """ _testListFiles_ Verify that the files iterator works correctly. """ testCollection = CouchCollection(database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="Thunderstruck") testCollection.setOwner(self.owner) testFileset = CouchFileset(database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="TestFileset") testCollection.addFileset(testFileset) testFiles = {} for i in range(5): lfn = makeUUID() testFile = File(lfn=lfn, size=random.randint(1024, 4096), events=random.randint(1024, 4096)) testFiles[lfn] = testFile testFileset.add([testFile]) for file in testFileset.listFiles(): self.assertTrue(file["lfn"] in testFiles.keys(), "Error: File missing.") self.assertEqual(file["events"], testFiles[file["lfn"]]["events"], "Error: Wrong number of events.") self.assertEqual(file["size"], testFiles[file["lfn"]]["size"], "Error: Wrong file size.") return
def testFileset(self): """ _testFileset_ Verify that converting an ACDC fileset to a DataStructs fileset works correctly. """ testCollection = CouchCollection(database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="Thunderstruck") testFileset = CouchFileset(database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="TestFileset") testCollection.addFileset(testFileset) testFiles = {} for i in range(5): lfn = makeUUID() testFile = File(lfn=lfn, size=random.randint(1024, 4096), events=random.randint(1024, 4096)) testFiles[lfn] = testFile testFileset.add([testFile]) for file in testFileset.fileset().files: self.assertTrue(file["lfn"] in testFiles, "Error: File missing.") self.assertEqual(file["events"], testFiles[file["lfn"]]["events"], "Error: Wrong number of events.") self.assertEqual(file["size"], testFiles[file["lfn"]]["size"], "Error: Wrong file size.") return
def failedJobs(self, failedJobs): """ _failedJobs_ Given a list of failed jobs, sort them into Filesets and record them NOTE: jobs must have a non-standard task, workflow, owner and group attributes assigned to them. """ for job in failedJobs: try: taskName = job['task'] workflow = job['workflow'] except KeyError, ex: msg = "Missing required, non-standard key %s in job in ACDC.DataCollectionService" % ( str(ex)) logging.error(msg) raise ACDCDCSException(msg) coll = CouchCollection(database=self.database, url=self.url, name=workflow, type=CollectionTypes.DataCollection) owner = self.newOwner(job.get("group", "cmsdataops"), job.get("owner", "cmsdataops")) coll.setOwner(owner) fileset = CouchFileset(database=self.database, url=self.url, name=taskName) coll.addFileset(fileset) fileset.add(files=job['input_files'], mask=job['mask'])
def testFileset(self): """ _testFileset_ Verify that converting an ACDC fileset to a DataStructs fileset works correctly. """ testCollection = CouchCollection(database = self.testInit.couchDbName, url = self.testInit.couchUrl, name = "Thunderstruck") testCollection.setOwner(self.owner) testFileset = CouchFileset(database = self.testInit.couchDbName, url = self.testInit.couchUrl, name = "TestFileset") testCollection.addFileset(testFileset) testFiles = {} for i in range(5): lfn = makeUUID() testFile = File(lfn = lfn, size = random.randint(1024, 4096), events = random.randint(1024, 4096)) testFiles[lfn] = testFile testFileset.add([testFile]) for file in testFileset.fileset().files: self.assertTrue(file["lfn"] in testFiles.keys(), "Error: File missing.") self.assertEqual(file["events"], testFiles[file["lfn"]]["events"], "Error: Wrong number of events.") self.assertEqual(file["size"], testFiles[file["lfn"]]["size"], "Error: Wrong file size.") return
def createFilesetFromDBS(self, collection, filesetName, dbsURL, dataset, mask=None): """ _createFilesetFromDBS_ Get info from DBS, apply mask (filter) and create a fileset """ fileSet = CouchFileset(database=self.database, url=self.url, name=filesetName) fileSet.setCollection(collection) files = [] blockLocations = {} dbsReader = DBSReader(dbsURL, version="DBS_2_0_9", mode="GET") dbsResults = dbsReader.dbs.listFiles( path=dataset, retriveList=["retrive_lumi", "retrive_run"]) logging.info('Found %s files from DBS' % len(dbsResults)) for dbsResult in dbsResults: blockName = dbsResult["Block"]["Name"] if not blockName in blockLocations: blockLocations[blockName] = dbsReader.listFileBlockLocation( blockName) file = File(lfn=dbsResult["LogicalFileName"], size=dbsResult["FileSize"], merged=True, events=dbsResult["NumberOfEvents"], locations=blockLocations[blockName]) runs = {} for lumi in dbsResult["LumiList"]: runNumber = lumi['RunNumber'] runString = str(runNumber) lumiNumber = lumi["LumiSectionNumber"] if runString in runs: runs[runString].lumis.append(lumiNumber) else: runs[runString] = Run(runNumber, lumiNumber) for run in runs.values(): file.addRun(run) files.append(file) logging.info('Uploading %s files in fileset' % len(files)) fileList = fileSet.add(files, mask) return fileSet, fileList
def failedJobs(self, failedJobs, useMask=True): """ _failedJobs_ Given a list of failed jobs, sort them into Filesets and record them NOTE: jobs must have a non-standard task, workflow attributes assigned to them. """ # first we sort the list of dictionary by two keys: workflow then task failedJobs.sort(key=itemgetter('workflow')) failedJobs.sort(key=itemgetter('task')) previousWorkflow = "" previousTask = "" for job in failedJobs: try: workflow = job['workflow'] taskName = job['task'] except KeyError as ex: msg = "Missing required, non-standard key %s in job in ACDC.DataCollectionService" % (str(ex)) logging.error(msg) raise ACDCDCSException(msg) if workflow != previousWorkflow: coll = CouchCollection(database=self.database, url=self.url, name=workflow, type=CollectionTypes.DataCollection) if taskName != previousTask: fileset = CouchFileset(database=self.database, url=self.url, name=taskName) coll.addFileset(fileset) inputFiles = job['input_files'] for fInfo in inputFiles: if int(fInfo["merged"]) == 1: # Looks like Oracle and MySQL return diff type fInfo["parents"] = [] elif fInfo.get("parents", []): firstParent = next(iter(fInfo["parents"])) if "/store/unmerged/" in firstParent: # parents and input files are unmerged files - need to find merged ascendant fInfo["parents"] = list(getMergedParents(fInfo["parents"])) elif "MCFakeFile" in firstParent: fInfo["parents"] = [] # other case, fInfo["parents"] all or merged parents if useMask: fileset.add(files=inputFiles, mask=job['mask']) else: fileset.add(files=inputFiles) previousWorkflow = workflow previousTask = taskName return
def run(self): lfn = self.lfn if self.v: print "Starting for",lfn now = time.mktime(time.gmtime()) fileInfo = self.fi requestObject = self.ro fileRuns = {} acdcCouchUrl = self.ac acdcCouchDb = self.acd filesetName = self.fsn collection = self.c self.lumis = 0 self.files = 0 for run in fileInfo['runs']: if run in requestObject['lumis']: for lumi in fileInfo['runs'][run][0]: if lumi in requestObject['lumis'][run]: if run not in fileRuns: fileRuns[run] = [] fileRuns[run].append(lumi) self.lumis += 1 if fileRuns: self.files += 1 fileset = CouchFileset(**{"url": acdcCouchUrl, "database": acdcCouchDb, "name": filesetName}) fileset.setCollection(collection) acdcRuns = [] for run in fileRuns: runObject = {} runObject['run_number'] = int(run) runObject['lumis'] = fileRuns[run] acdcRuns.append(runObject) acdcFile = {"lfn": lfn, "first_event": 0, "last_event": 0, "checksums": {}, "size": fileInfo["size"], "events": fileInfo["events"], "merged": 1, "parents": fileInfo["parents"], "locations": fileInfo["locations"], "runs": acdcRuns } # fileset.makeFilelist({lfn: acdcFile}) if self.v: print time.mktime(time.gmtime()) - now,"[s] for makeFilelist",lfn
def createFilesetFromDBS(self, collection, filesetName, dbsURL, dataset, mask=None): """ _createFilesetFromDBS_ Get info from DBS, apply mask (filter) and create a fileset """ fileSet = CouchFileset(database=self.database, url=self.url, name=filesetName) fileSet.setCollection(collection) files = [] blockLocations = {} dbsReader = DBSReader(dbsURL, version="DBS_2_0_9", mode="GET") dbsResults = dbsReader.dbs.listFiles(path=dataset, retriveList=["retrive_lumi", "retrive_run"]) logging.info("Found %s files from DBS" % len(dbsResults)) for dbsResult in dbsResults: blockName = dbsResult["Block"]["Name"] if not blockName in blockLocations: blockLocations[blockName] = dbsReader.listFileBlockLocation(blockName) file = File( lfn=dbsResult["LogicalFileName"], size=dbsResult["FileSize"], merged=True, events=dbsResult["NumberOfEvents"], locations=blockLocations[blockName], ) runs = {} for lumi in dbsResult["LumiList"]: runNumber = lumi["RunNumber"] runString = str(runNumber) lumiNumber = lumi["LumiSectionNumber"] if runString in runs: runs[runString].lumis.append(lumiNumber) else: runs[runString] = Run(runNumber, lumiNumber) for run in runs.values(): file.addRun(run) files.append(file) logging.info("Uploading %s files in fileset" % len(files)) fileList = fileSet.add(files, mask) return fileSet, fileList
def populate(self): """ _populate_ The load the collection and all filesets and files out of couch. """ params = {"startkey": [self.owner.group.name, self.owner.name, self.name], "endkey": [self.owner.group.name, self.owner.name, self.name, {}], "reduce": True, "group_level": 4} result = self.couchdb.loadView("ACDC", "owner_coll_fileset_docs", params) self["filesets"] = [] for row in result["rows"]: fileset = CouchFileset(database = self.database, url = self.url, name = row["key"][3]) self.addFileset(fileset) fileset.populate() return
def failedJobs(self, failedJobs, useMask=True): """ _failedJobs_ Given a list of failed jobs, sort them into Filesets and record them NOTE: jobs must have a non-standard task, workflow attributes assigned to them. """ for job in failedJobs: try: taskName = job['task'] workflow = job['workflow'] except KeyError as ex: msg = "Missing required, non-standard key %s in job in ACDC.DataCollectionService" % ( str(ex)) logging.error(msg) raise ACDCDCSException(msg) coll = CouchCollection(database=self.database, url=self.url, name=workflow, type=CollectionTypes.DataCollection) fileset = CouchFileset(database=self.database, url=self.url, name=taskName) coll.addFileset(fileset) if useMask: fileset.add(files=job['input_files'], mask=job['mask']) else: fileset.add(files=job['input_files']) return
def failedJobs(self, failedJobs, useMask=True): """ _failedJobs_ Given a list of failed jobs, sort them into Filesets and record them NOTE: jobs must have a non-standard task, workflow attributes assigned to them. """ # first we sort the list of dictionary by two keys: workflow then task failedJobs.sort(key=itemgetter('workflow')) failedJobs.sort(key=itemgetter('task')) previousWorkflow = "" previousTask = "" for job in failedJobs: try: workflow = job['workflow'] taskName = job['task'] except KeyError as ex: msg = "Missing required, non-standard key %s in job in ACDC.DataCollectionService" % ( str(ex)) logging.error(msg) raise ACDCDCSException(msg) if workflow != previousWorkflow: coll = CouchCollection(database=self.database, url=self.url, name=workflow, type=CollectionTypes.DataCollection) if taskName != previousTask: fileset = CouchFileset(database=self.database, url=self.url, name=taskName) coll.addFileset(fileset) inputFiles = job['input_files'] for fInfo in inputFiles: if int(fInfo["merged"] ) == 1: # Looks like Oracle and MySQL return diff type fInfo["parents"] = [] elif fInfo.get("parents", []): firstParent = next(iter(fInfo["parents"])) if "/store/unmerged/" in firstParent: # parents and input files are unmerged files - need to find merged ascendant fInfo["parents"] = list( getMergedParents(fInfo["parents"])) elif "MCFakeFile" in firstParent: fInfo["parents"] = [] # other case, fInfo["parents"] all or merged parents if useMask: fileset.add(files=inputFiles, mask=job['mask']) else: fileset.add(files=inputFiles) previousWorkflow = workflow previousTask = taskName return
def run(self): lfn = self.lfn if self.v: print "Starting for", lfn now = time.mktime(time.gmtime()) fileInfo = self.fi requestObject = self.ro fileRuns = {} acdcCouchUrl = self.ac acdcCouchDb = self.acd filesetName = self.fsn collection = self.c self.lumis = 0 self.files = 0 for run in fileInfo['runs']: if run in requestObject['lumis']: for lumi in fileInfo['runs'][run][0]: if lumi in requestObject['lumis'][run]: if run not in fileRuns: fileRuns[run] = [] fileRuns[run].append(lumi) self.lumis += 1 if fileRuns: self.files += 1 fileset = CouchFileset( **{ "url": acdcCouchUrl, "database": acdcCouchDb, "name": filesetName }) fileset.setCollection(collection) acdcRuns = [] for run in fileRuns: runObject = {} runObject['run_number'] = int(run) runObject['lumis'] = fileRuns[run] acdcRuns.append(runObject) acdcFile = { "lfn": lfn, "first_event": 0, "last_event": 0, "checksums": {}, "size": fileInfo["size"], "events": fileInfo["events"], "merged": 1, "parents": fileInfo["parents"], "locations": fileInfo["locations"], "runs": acdcRuns } # fileset.makeFilelist({lfn: acdcFile}) if self.v: print time.mktime( time.gmtime()) - now, "[s] for makeFilelist", lfn
def failedJobs(self, failedJobs, useMask=True): """ _failedJobs_ Given a list of failed jobs, sort them into Filesets and record them NOTE: jobs must have a non-standard task, workflow attributes assigned to them. """ for job in failedJobs: try: taskName = job['task'] workflow = job['workflow'] except KeyError as ex: msg = "Missing required, non-standard key %s in job in ACDC.DataCollectionService" % ( str(ex)) logging.error(msg) raise ACDCDCSException(msg) coll = CouchCollection(database=self.database, url=self.url, name=workflow, type=CollectionTypes.DataCollection) fileset = CouchFileset(database=self.database, url=self.url, name=taskName) coll.addFileset(fileset) inputFiles = job['input_files'] for fInfo in inputFiles: if fInfo["merged"] and ("parents" in fInfo) and \ len(fInfo["parents"]) and ("/store/unmerged/" in next(iter(fInfo["parents"]))): # remove parents files from acdc doucment if they are unmerged files fInfo["parents"] = [] if useMask: fileset.add(files=inputFiles, mask=job['mask']) else: fileset.add(files=inputFiles) return
def testListCollectionsFilesets(self): """ _testListCollectionsFilesets_ Verify that collections and filesets in ACDC can be listed. """ svc = CouchService(url = self.testInit.couchUrl, database = self.testInit.couchDbName) ownerA = svc.newOwner("somegroup", "someuserA") ownerB = svc.newOwner("somegroup", "someuserB") testCollectionA = CouchCollection(database = self.testInit.couchDbName, url = self.testInit.couchUrl, name = "Thunderstruck") testCollectionA.setOwner(ownerA) testCollectionB = CouchCollection(database = self.testInit.couchDbName, url = self.testInit.couchUrl, name = "Struckthunder") testCollectionB.setOwner(ownerA) testCollectionC = CouchCollection(database = self.testInit.couchDbName, url = self.testInit.couchUrl, name = "Thunderstruck") testCollectionC.setOwner(ownerB) testCollectionD = CouchCollection(database = self.testInit.couchDbName, url = self.testInit.couchUrl, name = "Thunderstruck") testCollectionD.setOwner(ownerB) testFilesetA = CouchFileset(database = self.testInit.couchDbName, url = self.testInit.couchUrl, name = "TestFilesetA") testCollectionA.addFileset(testFilesetA) testFilesetB = CouchFileset(database = self.testInit.couchDbName, url = self.testInit.couchUrl, name = "TestFilesetB") testCollectionB.addFileset(testFilesetB) testFilesetC = CouchFileset(database = self.testInit.couchDbName, url = self.testInit.couchUrl, name = "TestFilesetC") testCollectionC.addFileset(testFilesetC) testFilesetD = CouchFileset(database = self.testInit.couchDbName, url = self.testInit.couchUrl, name = "TestFilesetD") testCollectionC.addFileset(testFilesetD) testFiles = [] for i in range(5): testFile = File(lfn = makeUUID(), size = random.randint(1024, 4096), events = random.randint(1024, 4096)) testFiles.append(testFile) testFilesetA.add(testFiles) testFilesetB.add(testFiles) testFilesetC.add(testFiles) testFilesetD.add(testFiles) goldenCollectionNames = ["Thunderstruck", "Struckthunder"] for collection in svc.listCollections(ownerA): self.assertTrue(collection["name"] in goldenCollectionNames, "Error: Missing collection name.") goldenCollectionNames.remove(collection["name"]) self.assertEqual(len(goldenCollectionNames), 0, "Error: Missing collections.") goldenFilesetNames = ["TestFilesetC", "TestFilesetD"] for fileset in svc.listFilesets(testCollectionD): self.assertTrue(fileset["name"] in goldenFilesetNames, "Error: Missing fileset.") goldenFilesetNames.remove(fileset["name"]) self.assertEqual(len(goldenFilesetNames), 0, "Error: Missing filesets.") return
def populateCouchDB(self): """ _populateCouchDB_ Populate the ACDC records """ svc = CouchService(url=self.testInit.couchUrl, database=self.testInit.couchDbName) testCollectionA = CouchCollection(database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="Thunderstruck") testCollectionB = CouchCollection(database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="Struckthunder") testCollectionC = CouchCollection(database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="Thunderstruck") testCollectionD = CouchCollection(database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="Thunderstruck") testFilesetA = CouchFileset(database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="TestFilesetA") testCollectionA.addFileset(testFilesetA) testFilesetB = CouchFileset(database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="TestFilesetB") testCollectionB.addFileset(testFilesetB) testFilesetC = CouchFileset(database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="TestFilesetC") testCollectionC.addFileset(testFilesetC) testFilesetD = CouchFileset(database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="TestFilesetD") testCollectionC.addFileset(testFilesetD) testFiles = [] for i in range(5): testFile = File(lfn=makeUUID(), size=random.randint(1024, 4096), events=random.randint(1024, 4096)) testFiles.append(testFile) testFilesetA.add(testFiles) time.sleep(1) testFilesetB.add(testFiles) time.sleep(1) testFilesetC.add(testFiles) time.sleep(2) testFilesetD.add(testFiles)
def defineRequests(workload, requestInfo, acdcCouchUrl, acdcCouchDb, requestor, group, dbsUrl, fakeLocation, datasetInformation = None): """ _defineRequests_ This is the ultimate function, it will create JSONs for the appropiate resubmission requests that can be feed into the reqmgr.py script and it will assemble acdc records that can be uploaded to the database. """ # First retrieve the run and block lists and load # the information of all datasets logging.debug("Original request info:\n%s", requestInfo) topTask = workload.getTopLevelTask()[0] runWhitelist = topTask.inputRunWhitelist() runBlacklist = topTask.inputRunBlacklist() blockWhitelist = topTask.inputBlockWhitelist() blockBlacklist = topTask.inputBlockBlacklist() inputDataset = workload.listInputDatasets()[0] outputModules = getOutputModules(workload) if datasetInformation is None: datasetInformation = {} logging.info("Loading DBS information for the datasets...") datasetInformation[inputDataset] = getFiles(inputDataset, runBlacklist, runWhitelist, blockBlacklist, blockWhitelist, dbsUrl, fakeLocation=fakeLocation) for dataset in workload.listOutputDatasets(): datasetInformation[dataset] = getFiles(dataset, runBlacklist, runWhitelist, blockBlacklist, blockWhitelist, dbsUrl) logging.info("Finished loading DBS information for the datasets...") # Now get the information about the datasets and tasks nodes, edges = buildDatasetTree(workload) logging.info("Dataset tree built...") for k,v in nodes.items(): logging.debug("%s : %s" % (k,v)) for k,v in edges.items(): logging.debug("%s : %s" % (k,v)) # Load the difference information between input and outputs differenceInformation = buildDifferenceMap(workload, datasetInformation) logging.info("Difference map processed...") logging.debug("%s" % str(differenceInformation)) # Define an object that will hold the potential requests requests = [] logging.info("Now definining the required requests...") # First generate requests for the datasets with children, that way we can # shoot the requests with skims in single requests for dataset in differenceInformation.keys(): if dataset not in nodes: continue datasetsToRecover = [dataset] diffedLumis = differenceInformation[dataset] taskToRecover = edges[(inputDataset, dataset)]['task'] outputModulesToRecover = edges[(inputDataset,dataset)]['outMod'] intersectionDiff = {} for childDataset in nodes[dataset]: childDiffLumis = differenceInformation[childDataset] matchAvailable = False for run in diffedLumis: if run in childDiffLumis: for lumi in diffedLumis[run]: if lumi in childDiffLumis[run]: matchAvailable = True break if matchAvailable: outputModulesToRecover.extend(edges[(dataset, childDataset)]['outMod']) datasetsToRecover.append(childDataset) for run in diffedLumis: if run in childDiffLumis: if run not in intersectionDiff: intersectionDiff[run] = set() intersectionDiff[run] = diffedLumis[run] & childDiffLumis[run] else: intersectionDiff[run] &= diffedLumis[run] & childDiffLumis[run] else: intersectionDiff[run] = set() for run in intersectionDiff: if not intersectionDiff[run]: del intersectionDiff[run] if not intersectionDiff: # Can't create request for this dataset + children continue for run in intersectionDiff: for childDataset in nodes[dataset]: childDiffLumis = differenceInformation[childDataset] if run in childDiffLumis: childDiffLumis[run] -= intersectionDiff[run] if not childDiffLumis[run]: del childDiffLumis[run] diffedLumis[run] -= intersectionDiff[run] if not diffedLumis[run]: del diffedLumis[run] if not diffedLumis: del differenceInformation[dataset] for childDataset in nodes[dataset]: if not differenceInformation[childDataset]: del differenceInformation[childDataset] requestObject = {'task' : taskToRecover, 'input' : inputDataset, 'lumis' : intersectionDiff, 'outMod' : outputModulesToRecover, 'outputs' : datasetsToRecover} requests.append(requestObject) # Now go through all the output datasets, creating a single request for # each for dataset in differenceInformation: datasetsToRecover = [dataset] diffedLumis = differenceInformation[dataset] if (inputDataset, dataset) in edges: taskToRecover = edges[(inputDataset, dataset)]['task'] outputModulesToRecover = edges[(inputDataset,dataset)]['outMod'] parentDataset = inputDataset else: for parentDataset in nodes: if dataset in nodes[parentDataset]: taskToRecover = edges[(parentDataset, dataset)]['task'] outputModulesToRecover = edges[(parentDataset,dataset)]['outMod'] break requestObject = {'task' : taskToRecover, 'input' : parentDataset, 'lumis' : diffedLumis, 'outMod' : outputModulesToRecover, 'outputs' : datasetsToRecover} requests.append(requestObject) logging.info("About to upload ACDC records to: %s/%s" % (acdcCouchUrl, acdcCouchDb)) pprint(requests) # With the request objects we need to build ACDC records and # request JSONs for idx, requestObject in enumerate(requests): collectionName = '%s_%s' % (workload.name(), str(uuid.uuid1())) filesetName = requestObject['task'] collection = CouchCollection(**{"url" : acdcCouchUrl, "database" : acdcCouchDb, "name" : collectionName}) owner = makeUser(workload.getOwner()['group'], workload.getOwner()['name'], acdcCouchUrl, acdcCouchDb) collection.setOwner(owner) files = 0 lumis = 0 for lfn in datasetInformation[requestObject['input']]: fileInfo = datasetInformation[requestObject['input']][lfn] fileRuns = {} for run in fileInfo['runs']: if run in requestObject['lumis']: for lumi in fileInfo['runs'][run][0]: if lumi in requestObject['lumis'][run]: if run not in fileRuns: fileRuns[run] = [] fileRuns[run].append(lumi) lumis += 1 if fileRuns: files += 1 fileset = CouchFileset(**{"url" : acdcCouchUrl, "database" : acdcCouchDb, "name" : filesetName}) fileset.setCollection(collection) acdcRuns = [] for run in fileRuns: runObject = {} runObject['run_number'] = int(run) runObject['lumis'] = fileRuns[run] acdcRuns.append(runObject) acdcFile = {"lfn" : lfn, "first_event" : 0, "last_event" : 0, "checksums" : {}, "size" : fileInfo["size"], "events" : fileInfo["events"], "merged" : 1, "parents" : fileInfo["parents"], "locations" : fileInfo["locations"], "runs" : acdcRuns } fileset.makeFilelist({lfn : acdcFile}) # Put the creation parameters creationDict = jsonBlob["createRequest"] creationDict["OriginalRequestName"] = str(workload.name()) creationDict["InitialTaskPath"] = requestObject['task'] creationDict["CollectionName"] = collectionName creationDict["IgnoredOutputModules"] = list(set(outputModules) - set(requestObject['outMod'])) creationDict["ACDCServer"] = acdcCouchUrl creationDict["ACDCDatabase"] = acdcCouchDb creationDict["RequestString"] = "recovery-%d-%s" % (idx, workload.name()[:-18]) creationDict["Requestor"] = requestor creationDict["Group"] = group creationDict["TimePerEvent"] = requestInfo['TimePerEvent'] creationDict["Memory"] = requestInfo['Memory'] creationDict["SizePerEvent"] = requestInfo['SizePerEvent'] creationDict["PrepID"] = requestInfo.get('PrepID') creationDict["Campaign"] = requestInfo.get('Campaign') # Assign parameters assignDict = jsonBlob["assignRequest"] team = requestInfo['Teams'][0] processingString = requestInfo['ProcessingString'] processingVersion = requestInfo['ProcessingVersion'] acqEra = requestInfo['AcquisitionEra'] mergedLFNBase = requestInfo['MergedLFNBase'] unmergedLFNBase = requestInfo['UnmergedLFNBase'] #processingString = workload.getProcessingString() #processingVersion = workload.getProcessingVersion() #acqEra = workload.getAcquisitionEra() #mergedLFNBase = workload.getMergedLFNBase() #unmergedLFNBase = workload.getUnmergedLFNBase() topTask = workload.getTopLevelTask()[0] siteWhitelist = topTask.siteWhitelist() assignDict["SiteWhitelist"] = siteWhitelist assignDict["MergedLFNBase"] = mergedLFNBase assignDict["UnmergedLFNBase"] = unmergedLFNBase assignDict["AcquisitionEra"] = acqEra assignDict["Team"] = team try: int(processingVersion) assignDict["ProcessingVersion"] = int(processingVersion) if processingString is not None and processingString != 'None': assignDict["ProcessingString"] = processingString except Exception: tokens = processingVersion.split('-') assignDict["ProcessingVersion"] = int(tokens[-1][1:]) assignDict["ProcessingString"] = ('-').join(tokens[:-1]) fileHandle = open('%s.json' % creationDict["RequestString"], 'w') json.dump(jsonBlob, fileHandle) fileHandle.close() logging.info("Created JSON %s for recovery of %s" % ('%s.json' % creationDict["RequestString"], requestObject['outputs'])) logging.info("This will recover %d lumis in %d files" % (lumis, files))
def testCreatePopulateDrop(self): """ _testCreatePopulateDrop_ Test creating, populating and dropping a collection. """ testCollectionA = CouchCollection(database = self.testInit.couchDbName, url = self.testInit.couchUrl, name = "Thunderstruck") testCollectionB = CouchCollection(database = self.testInit.couchDbName, url = self.testInit.couchUrl, name = "StruckThunder") testCollectionA.setOwner(self.owner) testCollectionB.setOwner(self.owner) testCollectionA.create() testCollectionB.create() # There should be nothing in couch. Documents are only added for # filesets and files. testFilesA = [] for i in range(5): testFile = File(lfn = makeUUID(), size = random.randint(1024, 4096), events = random.randint(1024, 4096)) testFilesA.append(testFile) testFilesB = [] for i in range(10): testFile = File(lfn = makeUUID(), size = random.randint(1024, 4096), events = random.randint(1024, 4096)) testFilesB.append(testFile) testFilesetA = CouchFileset(database = self.testInit.couchDbName, url = self.testInit.couchUrl, name = "TestFilesetA") testFilesetB = CouchFileset(database = self.testInit.couchDbName, url = self.testInit.couchUrl, name = "TestFilesetB") testFilesetC = CouchFileset(database = self.testInit.couchDbName, url = self.testInit.couchUrl, name = "TestFilesetC") testCollectionA.addFileset(testFilesetA) testCollectionB.addFileset(testFilesetB) testCollectionB.addFileset(testFilesetC) testFilesetA.add(testFilesA) testFilesetB.add(testFilesA) testFilesetC.add(testFilesA) testFilesetC.add(testFilesB) # Drop testCollectionA testCollectionA.drop() # Try to populate testFilesetA testCollectionC = CouchCollection(database = self.testInit.couchDbName, url = self.testInit.couchUrl, name = "ThunderStruck") testCollectionC.setOwner(self.owner) testCollectionC.populate() self.assertEqual(len(testCollectionC["filesets"]), 0, "Error: There should be no filesets in this collect.") # Try to populate testFilesetB testCollectionD = CouchCollection(database = self.testInit.couchDbName, url = self.testInit.couchUrl, name = "StruckThunder") testCollectionD.setOwner(self.owner) testCollectionD.populate() for fileset in testCollectionD["filesets"]: testFiles = testFilesA if fileset["name"] == "TestFilesetC": testFiles.extend(testFilesB) self.assertEqual(len(testFiles), len(fileset.files.keys()), "Error: Wrong number of files in fileset.") for testFile in testFiles: self.assertTrue(testFile["lfn"] in fileset.files.keys(), "Error: File is missing.") self.assertEqual(testFile["events"], fileset.files[testFile["lfn"]]["events"], "Error: Wrong number of events.") self.assertEqual(testFile["size"], fileset.files[testFile["lfn"]]["size"], "Error: Wrong file size.") return
def testListCollectionsFilesets(self): """ _testListCollectionsFilesets_ Verify that collections and filesets in ACDC can be listed. """ svc = CouchService(url=self.testInit.couchUrl, database=self.testInit.couchDbName) testCollectionA = CouchCollection(database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="Thunderstruck") testCollectionB = CouchCollection(database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="Struckthunder") testCollectionC = CouchCollection(database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="Thunderstruck") testCollectionD = CouchCollection(database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="Thunderstruck") testFilesetA = CouchFileset(database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="TestFilesetA") testCollectionA.addFileset(testFilesetA) testFilesetB = CouchFileset(database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="TestFilesetB") testCollectionB.addFileset(testFilesetB) testFilesetC = CouchFileset(database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="TestFilesetC") testCollectionC.addFileset(testFilesetC) testFilesetD = CouchFileset(database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="TestFilesetD") testCollectionC.addFileset(testFilesetD) testFiles = [] for i in range(5): testFile = File(lfn=makeUUID(), size=random.randint(1024, 4096), events=random.randint(1024, 4096)) testFiles.append(testFile) testFilesetA.add(testFiles) testFilesetB.add(testFiles) testFilesetC.add(testFiles) testFilesetD.add(testFiles) goldenFilesetNames = ["TestFilesetA", "TestFilesetC", "TestFilesetD"] for fileset in svc.listFilesets(testCollectionD): self.assertTrue(fileset["name"] in goldenFilesetNames, "Error: Missing fileset.") goldenFilesetNames.remove(fileset["name"]) self.assertEqual(len(goldenFilesetNames), 0, "Error: Missing filesets.") return
def testDropCount(self): """ _testDropCount_ Verify that dropping a fileset and counting the files in a fileset works correctly. """ testCollectionA = CouchCollection(database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="Thunderstruck") testCollectionB = CouchCollection(database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="StruckThunder") testFiles = [] for i in range(5): testFile = File(lfn=makeUUID(), size=random.randint(1024, 4096), events=random.randint(1024, 4096)) testFiles.append(testFile) testFilesetA = CouchFileset(database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="TestFilesetA") testFilesetB = CouchFileset(database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="TestFilesetB") testFilesetC = CouchFileset(database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="TestFilesetC") testCollectionA.addFileset(testFilesetA) testCollectionB.addFileset(testFilesetB) testCollectionB.addFileset(testFilesetC) testFilesetA.add(testFiles) testFilesetB.add(testFiles) testFilesetC.add(testFiles) testFilesetC.drop() testCollectionC = CouchCollection(database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="StruckThunder") testCollectionC.populate() self.assertEqual( len(testCollectionC["filesets"]), 1, "Error: There should be one fileset in this collection.") self.assertEqual(testCollectionC["filesets"][0].fileCount(), 5, "Error: Wrong number of files in fileset.") testCollectionD = CouchCollection(database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="Thunderstruck") testCollectionD.populate() self.assertEqual( len(testCollectionD["filesets"]), 1, "Error: There should be one fileset in this collection.") self.assertEqual(testCollectionD["filesets"][0].fileCount(), 5, "Error: Wrong number of files in fileset.") return
def populateCouchDB(self): """ _populateCouchDB_ Populate the ACDC records """ svc = CouchService(url=self.testInit.couchUrl, database=self.testInit.couchDbName) ownerA = svc.newOwner("somegroup", "someuserA") ownerB = svc.newOwner("somegroup", "someuserB") testCollectionA = CouchCollection( database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="Thunderstruck" ) testCollectionA.setOwner(ownerA) testCollectionB = CouchCollection( database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="Struckthunder" ) testCollectionB.setOwner(ownerA) testCollectionC = CouchCollection( database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="Thunderstruck" ) testCollectionC.setOwner(ownerB) testCollectionD = CouchCollection( database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="Thunderstruck" ) testCollectionD.setOwner(ownerB) testFilesetA = CouchFileset(database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="TestFilesetA") testCollectionA.addFileset(testFilesetA) testFilesetB = CouchFileset(database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="TestFilesetB") testCollectionB.addFileset(testFilesetB) testFilesetC = CouchFileset(database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="TestFilesetC") testCollectionC.addFileset(testFilesetC) testFilesetD = CouchFileset(database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="TestFilesetD") testCollectionC.addFileset(testFilesetD) testFiles = [] for i in range(5): testFile = File(lfn=makeUUID(), size=random.randint(1024, 4096), events=random.randint(1024, 4096)) testFiles.append(testFile) testFilesetA.add(testFiles) time.sleep(1) testFilesetB.add(testFiles) time.sleep(1) testFilesetC.add(testFiles) time.sleep(2) testFilesetD.add(testFiles)
def defineRequests(workload, requestInfo, acdcCouchUrl, acdcCouchDb, requestor, group, dbsUrl, datasetInformation=None): """ _defineRequests_ This is the ultimate function, it will create JSONs for the appropiate resubmission requests that can be feed into the reqmgr.py script and it will assemble acdc records that can be uploaded to the database. """ # First retrieve the run and block lists and load # the information of all datasets topTask = workload.getTopLevelTask()[0] runWhitelist = topTask.inputRunWhitelist() runBlacklist = topTask.inputRunBlacklist() blockWhitelist = topTask.inputBlockWhitelist() blockBlacklist = topTask.inputBlockBlacklist() inputDataset = workload.listInputDatasets()[0] outputModules = getOutputModules(workload) if datasetInformation is None: datasetInformation = {} logging.info("Loading DBS information for the datasets...") datasetInformation[inputDataset] = getFiles(inputDataset, runBlacklist, runWhitelist, blockBlacklist, blockWhitelist, dbsUrl) for dataset in workload.listOutputDatasets(): datasetInformation[dataset] = getFiles(dataset, runBlacklist, runWhitelist, blockBlacklist, blockWhitelist, dbsUrl) logging.info("Finished loading DBS information for the datasets...") # Now get the information about the datasets and tasks nodes, edges = buildDatasetTree(workload) logging.info("Dataset tree built...") for k, v in nodes.items(): logging.debug("%s : %s" % (k, v)) for k, v in edges.items(): logging.debug("%s : %s" % (k, v)) # Load the difference information between input and outputs differenceInformation = buildDifferenceMap(workload, datasetInformation) logging.info("Difference map processed...") logging.debug("%s" % str(differenceInformation)) # Define an object that will hold the potential requests requests = [] logging.info("Now definining the required requests...") # First generate requests for the datasets with children, that way we can # shoot the requests with skims in single requests for dataset in differenceInformation.keys(): if dataset not in nodes: continue datasetsToRecover = [dataset] diffedLumis = differenceInformation[dataset] taskToRecover = edges[(inputDataset, dataset)]['task'] outputModulesToRecover = edges[(inputDataset, dataset)]['outMod'] intersectionDiff = {} for childDataset in nodes[dataset]: childDiffLumis = differenceInformation[childDataset] matchAvailable = False for run in diffedLumis: if run in childDiffLumis: for lumi in diffedLumis[run]: if lumi in childDiffLumis[run]: matchAvailable = True break if matchAvailable: outputModulesToRecover.extend(edges[(dataset, childDataset)]['outMod']) datasetsToRecover.append(childDataset) for run in diffedLumis: if run in childDiffLumis: if run not in intersectionDiff: intersectionDiff[run] = set() intersectionDiff[ run] = diffedLumis[run] & childDiffLumis[run] else: intersectionDiff[ run] &= diffedLumis[run] & childDiffLumis[run] else: intersectionDiff[run] = set() for run in intersectionDiff: if not intersectionDiff[run]: del intersectionDiff[run] if not intersectionDiff: # Can't create request for this dataset + children continue for run in intersectionDiff: for childDataset in nodes[dataset]: childDiffLumis = differenceInformation[childDataset] if run in childDiffLumis: childDiffLumis[run] -= intersectionDiff[run] if not childDiffLumis[run]: del childDiffLumis[run] diffedLumis[run] -= intersectionDiff[run] if not diffedLumis[run]: del diffedLumis[run] if not diffedLumis: del differenceInformation[dataset] for childDataset in nodes[dataset]: if not differenceInformation[childDataset]: del differenceInformation[childDataset] requestObject = { 'task': taskToRecover, 'input': inputDataset, 'lumis': intersectionDiff, 'outMod': outputModulesToRecover, 'outputs': datasetsToRecover } requests.append(requestObject) # Now go through all the output datasets, creating a single request for # each for dataset in differenceInformation: datasetsToRecover = [dataset] diffedLumis = differenceInformation[dataset] if (inputDataset, dataset) in edges: taskToRecover = edges[(inputDataset, dataset)]['task'] outputModulesToRecover = edges[(inputDataset, dataset)]['outMod'] parentDataset = inputDataset else: for parentDataset in nodes: if dataset in nodes[parentDataset]: taskToRecover = edges[(parentDataset, dataset)]['task'] outputModulesToRecover = edges[(parentDataset, dataset)]['outMod'] break requestObject = { 'task': taskToRecover, 'input': parentDataset, 'lumis': diffedLumis, 'outMod': outputModulesToRecover, 'outputs': datasetsToRecover } requests.append(requestObject) logging.info("About to upload ACDC records to: %s/%s" % (acdcCouchUrl, acdcCouchDb)) # With the request objects we need to build ACDC records and # request JSONs pprint(requests) for idx, requestObject in enumerate(requests): collectionName = '%s_%s' % (workload.name(), str(uuid.uuid1())) filesetName = requestObject['task'] collection = CouchCollection(**{ "url": acdcCouchUrl, "database": acdcCouchDb, "name": collectionName }) owner = makeUser(workload.getOwner()['group'], workload.getOwner()['name'], acdcCouchUrl, acdcCouchDb) collection.setOwner(owner) files = 0 lumis = 0 for lfn in datasetInformation[requestObject['input']]: fileInfo = datasetInformation[requestObject['input']][lfn] fileRuns = {} for run in fileInfo['runs']: if run in requestObject['lumis']: for lumi in fileInfo['runs'][run][0]: if lumi in requestObject['lumis'][run]: if run not in fileRuns: fileRuns[run] = [] fileRuns[run].append(lumi) lumis += 1 if fileRuns: files += 1 fileset = CouchFileset( **{ "url": acdcCouchUrl, "database": acdcCouchDb, "name": filesetName }) fileset.setCollection(collection) acdcRuns = [] for run in fileRuns: runObject = {} runObject['run_number'] = int(run) runObject['lumis'] = fileRuns[run] acdcRuns.append(runObject) acdcFile = { "lfn": lfn, "first_event": 0, "last_event": 0, "checksums": {}, "size": fileInfo["size"], "events": fileInfo["events"], "merged": 1, "parents": fileInfo["parents"], "locations": fileInfo["locations"], "runs": acdcRuns } fileset.makeFilelist({lfn: acdcFile}) # Put the creation parameters creationDict = jsonBlob["createRequest"] creationDict["OriginalRequestName"] = str(workload.name()) creationDict["InitialTaskPath"] = requestObject['task'] creationDict["CollectionName"] = collectionName creationDict["IgnoredOutputModules"] = list( set(outputModules) - set(requestObject['outMod'])) creationDict["ACDCServer"] = acdcCouchUrl creationDict["ACDCDatabase"] = acdcCouchDb creationDict["RequestString"] = "recovery-%d-%s" % ( idx, workload.name()[:-18]) creationDict["Requestor"] = requestor creationDict["Group"] = group # Assign parameters assignDict = jsonBlob["assignRequest"] team = requestInfo['teams'][0] processingString = workload.getProcessingString() processingVersion = workload.getProcessingVersion() acqEra = workload.getAcquisitionEra() mergedLFNBase = workload.getMergedLFNBase() unmergedLFNBase = workload.getUnmergedLFNBase() topTask = workload.getTopLevelTask()[0] siteWhitelist = topTask.siteWhitelist() assignDict["SiteWhitelist"] = siteWhitelist assignDict["MergedLFNBase"] = mergedLFNBase assignDict["UnmergedLFNBase"] = unmergedLFNBase assignDict["AcquisitionEra"] = acqEra assignDict["Team"] = team try: int(processingVersion) assignDict["ProcessingVersion"] = int(processingVersion) if processingString is not None and processingString != 'None': assignDict["ProcessingString"] = processingString except Exception: tokens = processingVersion.split('-') assignDict["ProcessingVersion"] = int(tokens[-1][1:]) assignDict["ProcessingString"] = ('-').join(tokens[:-1]) fileHandle = open('%s.json' % creationDict["RequestString"], 'w') json.dump(jsonBlob, fileHandle) fileHandle.close() logging.info("Created JSON %s for recovery of %s" % ('%s.json' % creationDict["RequestString"], requestObject['outputs'])) logging.info("This will recover %d lumis in %d files" % (lumis, files))
def testCreatePopulateDrop(self): """ _testCreatePopulateDrop_ Test creating, populating and dropping a collection. """ testCollectionA = CouchCollection(database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="Thunderstruck") testCollectionB = CouchCollection(database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="StruckThunder") testCollectionA.setOwner(self.owner) testCollectionB.setOwner(self.owner) testCollectionA.create() testCollectionB.create() # There should be nothing in couch. Documents are only added for # filesets and files. testFilesA = [] for i in range(5): testFile = File(lfn=makeUUID(), size=random.randint(1024, 4096), events=random.randint(1024, 4096)) testFilesA.append(testFile) testFilesB = [] for i in range(10): testFile = File(lfn=makeUUID(), size=random.randint(1024, 4096), events=random.randint(1024, 4096)) testFilesB.append(testFile) testFilesetA = CouchFileset(database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="TestFilesetA") testFilesetB = CouchFileset(database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="TestFilesetB") testFilesetC = CouchFileset(database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="TestFilesetC") testCollectionA.addFileset(testFilesetA) testCollectionB.addFileset(testFilesetB) testCollectionB.addFileset(testFilesetC) testFilesetA.add(testFilesA) testFilesetB.add(testFilesA) testFilesetC.add(testFilesA) testFilesetC.add(testFilesB) # Drop testCollectionA testCollectionA.drop() # Try to populate testFilesetA testCollectionC = CouchCollection(database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="ThunderStruck") testCollectionC.setOwner(self.owner) testCollectionC.populate() self.assertEqual( len(testCollectionC["filesets"]), 0, "Error: There should be no filesets in this collect.") # Try to populate testFilesetB testCollectionD = CouchCollection(database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="StruckThunder") testCollectionD.setOwner(self.owner) testCollectionD.populate() for fileset in testCollectionD["filesets"]: testFiles = testFilesA if fileset["name"] == "TestFilesetC": testFiles.extend(testFilesB) self.assertEqual(len(testFiles), len(fileset.files.keys()), "Error: Wrong number of files in fileset.") for testFile in testFiles: self.assertTrue(testFile["lfn"] in fileset.files.keys(), "Error: File is missing.") self.assertEqual(testFile["events"], fileset.files[testFile["lfn"]]["events"], "Error: Wrong number of events.") self.assertEqual(testFile["size"], fileset.files[testFile["lfn"]]["size"], "Error: Wrong file size.") return
def testDropCount(self): """ _testDropCount_ Verify that dropping a fileset and counting the files in a fileset works correctly. """ testCollectionA = CouchCollection(database = self.testInit.couchDbName, url = self.testInit.couchUrl, name = "Thunderstruck") testCollectionB = CouchCollection(database = self.testInit.couchDbName, url = self.testInit.couchUrl, name = "StruckThunder") testCollectionA.setOwner(self.owner) testCollectionB.setOwner(self.owner) testFiles = [] for i in range(5): testFile = File(lfn = makeUUID(), size = random.randint(1024, 4096), events = random.randint(1024, 4096)) testFiles.append(testFile) testFilesetA = CouchFileset(database = self.testInit.couchDbName, url = self.testInit.couchUrl, name = "TestFilesetA") testFilesetB = CouchFileset(database = self.testInit.couchDbName, url = self.testInit.couchUrl, name = "TestFilesetB") testFilesetC = CouchFileset(database = self.testInit.couchDbName, url = self.testInit.couchUrl, name = "TestFilesetC") testCollectionA.addFileset(testFilesetA) testCollectionB.addFileset(testFilesetB) testCollectionB.addFileset(testFilesetC) testFilesetA.add(testFiles) testFilesetB.add(testFiles) testFilesetC.add(testFiles) testFilesetC.drop() testCollectionC = CouchCollection(database = self.testInit.couchDbName, url = self.testInit.couchUrl, name = "StruckThunder") testCollectionC.setOwner(self.owner) testCollectionC.populate() self.assertEqual(len(testCollectionC["filesets"]), 1, "Error: There should be one fileset in this collection.") self.assertEqual(testCollectionC["filesets"][0].fileCount(), 5, "Error: Wrong number of files in fileset.") testCollectionD = CouchCollection(database = self.testInit.couchDbName, url = self.testInit.couchUrl, name = "Thunderstruck") testCollectionD.setOwner(self.owner) testCollectionD.populate() self.assertEqual(len(testCollectionD["filesets"]), 1, "Error: There should be one fileset in this collection.") self.assertEqual(testCollectionD["filesets"][0].fileCount(), 5, "Error: Wrong number of files in fileset.") return