Ejemplo n.º 1
0
    def setupACDCDatabase(self, collectionName, taskPath,
                          user, group):
        """
        _setupACDCDatabase_

        Populate an ACDC database with bogus records
        associated to certain collection name, user and task path.
        """
        acdcServer = CouchService(url = self.testInit.couchUrl,
                                  database = "%s_acdc" % self.couchDBName)
        owner = acdcServer.newOwner(group, user)
        testCollection = CouchCollection(database = self.testInit.couchDbName,
                                          url = self.testInit.couchUrl,
                                          name = collectionName)
        testCollection.setOwner(owner)
        testFileset = CouchFileset(database = self.testInit.couchDbName,
                                    url = self.testInit.couchUrl,
                                    name = taskPath)
        testCollection.addFileset(testFileset)

        testFiles = []
        for _ in range(5):
            testFile = File(lfn = makeUUID(), size = random.randint(1024, 4096),
                            events = random.randint(1024, 4096))
            testFiles.append(testFile)

        testFileset.add(testFiles)
Ejemplo n.º 2
0
    def testFileset(self):
        """
        _testFileset_

        Verify that converting an ACDC fileset to a DataStructs fileset works
        correctly.
        """
        testCollection = CouchCollection(database = self.testInit.couchDbName,
                                         url = self.testInit.couchUrl, 
                                         name = "Thunderstruck")
        testCollection.setOwner(self.owner)
        testFileset = CouchFileset(database = self.testInit.couchDbName,
                                   url = self.testInit.couchUrl,
                                   name = "TestFileset")
        testCollection.addFileset(testFileset)

        testFiles = {}
        for i in range(5):
            lfn = makeUUID()
            testFile = File(lfn = lfn, size = random.randint(1024, 4096),
                            events = random.randint(1024, 4096))
            testFiles[lfn] = testFile
            testFileset.add([testFile])

        for file in testFileset.fileset().files:
            self.assertTrue(file["lfn"] in testFiles.keys(),
                            "Error: File missing.")
            self.assertEqual(file["events"], testFiles[file["lfn"]]["events"],
                             "Error: Wrong number of events.")
            self.assertEqual(file["size"], testFiles[file["lfn"]]["size"],
                             "Error: Wrong file size.")
        return
Ejemplo n.º 3
0
    def failedJobs(self, failedJobs, useMask=True):
        """
        _failedJobs_

        Given a list of failed jobs, sort them into Filesets and record them

        NOTE: jobs must have a non-standard task, workflow attributes assigned to them.
        """
        for job in failedJobs:
            try:
                taskName = job['task']
                workflow = job['workflow']
            except KeyError as ex:
                msg = "Missing required, non-standard key %s in job in ACDC.DataCollectionService" % (
                    str(ex))
                logging.error(msg)
                raise ACDCDCSException(msg)

            coll = CouchCollection(database=self.database,
                                   url=self.url,
                                   name=workflow,
                                   type=CollectionTypes.DataCollection)
            fileset = CouchFileset(database=self.database,
                                   url=self.url,
                                   name=taskName)
            coll.addFileset(fileset)
            if useMask:
                fileset.add(files=job['input_files'], mask=job['mask'])
            else:
                fileset.add(files=job['input_files'])

        return
Ejemplo n.º 4
0
    def testFileset(self):
        """
        _testFileset_

        Verify that converting an ACDC fileset to a DataStructs fileset works
        correctly.
        """
        testCollection = CouchCollection(database=self.testInit.couchDbName,
                                         url=self.testInit.couchUrl,
                                         name="Thunderstruck")
        testFileset = CouchFileset(database=self.testInit.couchDbName,
                                   url=self.testInit.couchUrl,
                                   name="TestFileset")
        testCollection.addFileset(testFileset)

        testFiles = {}
        for i in range(5):
            lfn = makeUUID()
            testFile = File(lfn=lfn,
                            size=random.randint(1024, 4096),
                            events=random.randint(1024, 4096))
            testFiles[lfn] = testFile
            testFileset.add([testFile])

        for file in testFileset.fileset().files:
            self.assertTrue(file["lfn"] in testFiles, "Error: File missing.")
            self.assertEqual(file["events"], testFiles[file["lfn"]]["events"],
                             "Error: Wrong number of events.")
            self.assertEqual(file["size"], testFiles[file["lfn"]]["size"],
                             "Error: Wrong file size.")
        return
Ejemplo n.º 5
0
    def failedJobs(self, failedJobs, useMask = True):
        """
        _failedJobs_

        Given a list of failed jobs, sort them into Filesets and record them

        NOTE: jobs must have a non-standard task, workflow, owner and group
        attributes assigned to them.
        """
        for job in failedJobs:
            try:
                taskName = job['task']
                workflow = job['workflow']
            except KeyError as ex:
                msg =  "Missing required, non-standard key %s in job in ACDC.DataCollectionService" % (str(ex))
                logging.error(msg)
                raise ACDCDCSException(msg)

            coll = CouchCollection(database = self.database, url = self.url,
                                   name = workflow,
                                   type = CollectionTypes.DataCollection)
            owner = self.newOwner(job.get("group", "cmsdataops"),
                                  job.get("owner", "cmsdataops"))
            coll.setOwner(owner)
            fileset = CouchFileset(database = self.database, url = self.url,
                                    name = taskName)
            coll.addFileset(fileset)
            if useMask:
                fileset.add(files = job['input_files'], mask = job['mask'])
            else:
                fileset.add(files = job['input_files'])

        return
Ejemplo n.º 6
0
    def failedJobs(self, failedJobs, useMask=True):
        """
        _failedJobs_

        Given a list of failed jobs, sort them into Filesets and record them

        NOTE: jobs must have a non-standard task, workflow attributes assigned to them.
        """
        # first we sort the list of dictionary by two keys: workflow then task
        failedJobs.sort(key=itemgetter('workflow'))
        failedJobs.sort(key=itemgetter('task'))

        previousWorkflow = ""
        previousTask = ""
        for job in failedJobs:
            try:
                workflow = job['workflow']
                taskName = job['task']
            except KeyError as ex:
                msg = "Missing required, non-standard key %s in job in ACDC.DataCollectionService" % (
                    str(ex))
                logging.error(msg)
                raise ACDCDCSException(msg)

            if workflow != previousWorkflow:
                coll = CouchCollection(database=self.database,
                                       url=self.url,
                                       name=workflow,
                                       type=CollectionTypes.DataCollection)
            if taskName != previousTask:
                fileset = CouchFileset(database=self.database,
                                       url=self.url,
                                       name=taskName)
            coll.addFileset(fileset)
            inputFiles = job['input_files']
            for fInfo in inputFiles:
                if int(fInfo["merged"]
                       ) == 1:  # Looks like Oracle and MySQL return diff type
                    fInfo["parents"] = []
                elif fInfo.get("parents", []):
                    firstParent = next(iter(fInfo["parents"]))
                    if "/store/unmerged/" in firstParent:
                        # parents and input files are unmerged files - need to find merged ascendant
                        fInfo["parents"] = list(
                            getMergedParents(fInfo["parents"]))
                    elif "MCFakeFile" in firstParent:
                        fInfo["parents"] = []
                    # other case, fInfo["parents"] all or merged parents
            if useMask:
                fileset.add(files=inputFiles, mask=job['mask'])
            else:
                fileset.add(files=inputFiles)

            previousWorkflow = workflow
            previousTask = taskName

        return
    def getDataCollection(self, collName):
        """
        _getDataCollection_

        Get a data collection by name
        """
        coll = CouchCollection(name=collName, database=self.database, url=self.url)

        coll.populate()
        return coll
Ejemplo n.º 8
0
    def getDataCollection(self, collName):
        """
        _getDataCollection_

        Get a data collection by name
        """
        coll = CouchCollection(name=collName, database=self.database, url=self.url)

        coll.populate()
        return coll
Ejemplo n.º 9
0
    def failedJobs(self, failedJobs, useMask=True):
        """
        _failedJobs_

        Given a list of failed jobs, sort them into Filesets and record them

        NOTE: jobs must have a non-standard task, workflow attributes assigned to them.
        """
        # first we sort the list of dictionary by two keys: workflow then task
        failedJobs.sort(key=itemgetter('workflow'))
        failedJobs.sort(key=itemgetter('task'))

        previousWorkflow = ""
        previousTask = ""
        for job in failedJobs:
            try:
                workflow = job['workflow']
                taskName = job['task']
            except KeyError as ex:
                msg = "Missing required, non-standard key %s in job in ACDC.DataCollectionService" % (str(ex))
                logging.error(msg)
                raise ACDCDCSException(msg)

            if workflow != previousWorkflow:
                coll = CouchCollection(database=self.database, url=self.url,
                                       name=workflow,
                                       type=CollectionTypes.DataCollection)
            if taskName != previousTask:
                fileset = CouchFileset(database=self.database, url=self.url,
                                       name=taskName)
            coll.addFileset(fileset)
            inputFiles = job['input_files']
            for fInfo in inputFiles:
                if int(fInfo["merged"]) == 1:  # Looks like Oracle and MySQL return diff type
                    fInfo["parents"] = []
                elif fInfo.get("parents", []):
                    firstParent = next(iter(fInfo["parents"]))
                    if "/store/unmerged/" in firstParent:
                        # parents and input files are unmerged files - need to find merged ascendant
                        fInfo["parents"] = list(getMergedParents(fInfo["parents"]))
                    elif "MCFakeFile" in firstParent:
                        fInfo["parents"] = []
                    # other case, fInfo["parents"] all or merged parents
            if useMask:
                fileset.add(files=inputFiles, mask=job['mask'])
            else:
                fileset.add(files=inputFiles)

            previousWorkflow = workflow
            previousTask = taskName

        return
Ejemplo n.º 10
0
    def getDataCollection(self, collName, user = "******",
                          group = "cmsdataops"):
        """
        _getDataCollection_

        Get a data collection by name
        """
        coll = CouchCollection(name = collName, database = self.database,
                               url = self.url)

        coll.owner = self.newOwner(group, user)
        coll.populate()
        return coll
Ejemplo n.º 11
0
    def getDataCollection(self,
                          collName,
                          user="******",
                          group="cmsdataops"):
        """
        _getDataCollection_

        Get a data collection by name
        """
        coll = CouchCollection(name=collName,
                               database=self.database,
                               url=self.url)

        coll.owner = self.newOwner(group, user)
        coll.populate()
        return coll
Ejemplo n.º 12
0
    def listCollections(self, owner):
        """
        _listCollections_

        List the collections belonging to an owner.
        """
        params = {"startkey": [owner.group.name, owner.name],
                  "endkey": [owner.group.name, owner.name, {}],
                  "reduce": True, "group_level": 3}

        result = self.couchdb.loadView("ACDC", "owner_coll_fileset_docs",
                                       params)

        for row in result["rows"]:
            coll = CouchCollection(name = row["key"][2],
                                   database = self.database, url = self.url)
            coll.setOwner(owner)
            coll.populate()
            yield coll
Ejemplo n.º 13
0
    def setupACDCDatabase(self, collectionName, taskPath,
                          user, group):
        """
        _setupACDCDatabase_

        Populate an ACDC database with bogus records
        associated to certain collection name, user and task path.
        """
        acdcServer = CouchService(url = self.testInit.couchUrl,
                                  database = "%s_acdc" % self.couchDBName)
        owner = acdcServer.newOwner(group, user)
        testCollection = CouchCollection(database = self.testInit.couchDbName,
                                          url = self.testInit.couchUrl,
                                          name = collectionName)
        testCollection.setOwner(owner)
        testFileset = CouchFileset(database = self.testInit.couchDbName,
                                    url = self.testInit.couchUrl,
                                    name = taskPath)
        testCollection.addFileset(testFileset)

        testFiles = []
        for _ in range(5):
            testFile = File(lfn = makeUUID(), size = random.randint(1024, 4096),
                            events = random.randint(1024, 4096))
            testFiles.append(testFile)

        testFileset.add(testFiles)
Ejemplo n.º 14
0
    def testListFiles(self):
        """
        _testListFiles_

        Verify that the files iterator works correctly.
        """
        testCollection = CouchCollection(database=self.testInit.couchDbName,
                                         url=self.testInit.couchUrl,
                                         name="Thunderstruck")
        testCollection.setOwner(self.owner)
        testFileset = CouchFileset(database=self.testInit.couchDbName,
                                   url=self.testInit.couchUrl,
                                   name="TestFileset")
        testCollection.addFileset(testFileset)

        testFiles = {}
        for i in range(5):
            lfn = makeUUID()
            testFile = File(lfn=lfn,
                            size=random.randint(1024, 4096),
                            events=random.randint(1024, 4096))
            testFiles[lfn] = testFile
            testFileset.add([testFile])

        for file in testFileset.listFiles():
            self.assertTrue(file["lfn"] in testFiles.keys(),
                            "Error: File missing.")
            self.assertEqual(file["events"], testFiles[file["lfn"]]["events"],
                             "Error: Wrong number of events.")
            self.assertEqual(file["size"], testFiles[file["lfn"]]["size"],
                             "Error: Wrong file size.")
        return
Ejemplo n.º 15
0
    def failedJobs(self, failedJobs):
        """
        _failedJobs_

        Given a list of failed jobs, sort them into Filesets and record them

        NOTE: jobs must have a non-standard task, workflow, owner and group
        attributes assigned to them.
        """
        for job in failedJobs:
            try:
                taskName = job['task']
                workflow = job['workflow']
            except KeyError, ex:
                msg = "Missing required, non-standard key %s in job in ACDC.DataCollectionService" % (
                    str(ex))
                logging.error(msg)
                raise ACDCDCSException(msg)

            coll = CouchCollection(database=self.database,
                                   url=self.url,
                                   name=workflow,
                                   type=CollectionTypes.DataCollection)
            owner = self.newOwner(job.get("group", "cmsdataops"),
                                  job.get("owner", "cmsdataops"))
            coll.setOwner(owner)
            fileset = CouchFileset(database=self.database,
                                   url=self.url,
                                   name=taskName)
            coll.addFileset(fileset)
            fileset.add(files=job['input_files'], mask=job['mask'])
Ejemplo n.º 16
0
    def createCollection(self, collectionName, userName, groupName):
        """
        _createCollection_

        Create an empty AnalysisCollection
        """

        if userName == None:
            msg = "WMSpec does not contain an owner.name parameter"
            raise RuntimeError(msg)
        if groupName == None:
            msg = "WMSpec does not contain an owner.group parameter"
            raise RuntimeError(msg)

        user = self.newOwner(groupName, userName)
        collection = CouchCollection(
            name=collectionName, type=CollectionTypes.AnalysisCollection, url=self.url, database=self.database
        )
        collection.setOwner(user)
        collection.create()

        return collection
Ejemplo n.º 17
0
    def failedJobs(self, failedJobs, useMask=True):
        """
        _failedJobs_

        Given a list of failed jobs, sort them into Filesets and record them

        NOTE: jobs must have a non-standard task, workflow attributes assigned to them.
        """
        for job in failedJobs:
            try:
                taskName = job['task']
                workflow = job['workflow']
            except KeyError as ex:
                msg = "Missing required, non-standard key %s in job in ACDC.DataCollectionService" % (
                    str(ex))
                logging.error(msg)
                raise ACDCDCSException(msg)

            coll = CouchCollection(database=self.database,
                                   url=self.url,
                                   name=workflow,
                                   type=CollectionTypes.DataCollection)
            fileset = CouchFileset(database=self.database,
                                   url=self.url,
                                   name=taskName)
            coll.addFileset(fileset)
            inputFiles = job['input_files']
            for fInfo in inputFiles:
                if fInfo["merged"] and ("parents" in fInfo) and \
                   len(fInfo["parents"]) and ("/store/unmerged/" in next(iter(fInfo["parents"]))):
                    # remove parents files from acdc doucment if they are unmerged files
                    fInfo["parents"] = []
            if useMask:
                fileset.add(files=inputFiles, mask=job['mask'])
            else:
                fileset.add(files=inputFiles)

        return
Ejemplo n.º 18
0
    def listCollections(self, owner):
        """
        _listCollections_

        List the collections belonging to an owner.
        """
        params = {
            "startkey": [owner.group.name, owner.name],
            "endkey": [owner.group.name, owner.name, {}],
            "reduce": True,
            "group_level": 3
        }

        result = self.couchdb.loadView("ACDC", "owner_coll_fileset_docs",
                                       params)

        for row in result["rows"]:
            coll = CouchCollection(name=row["key"][2],
                                   database=self.database,
                                   url=self.url)
            coll.setOwner(owner)
            coll.populate()
            yield coll
Ejemplo n.º 19
0
    def createCollection(self, collectionName, userName, groupName):
        """
        _createCollection_

        Create an empty AnalysisCollection
        """

        if userName == None:
            msg = "WMSpec does not contain an owner.name parameter"
            raise RuntimeError(msg)
        if groupName == None:
            msg = "WMSpec does not contain an owner.group parameter"
            raise RuntimeError(msg)

        user = self.newOwner(groupName, userName)
        collection = CouchCollection(name=collectionName,
                                     type=CollectionTypes.AnalysisCollection,
                                     url=self.url,
                                     database=self.database)
        collection.setOwner(user)
        collection.create()

        return collection
Ejemplo n.º 20
0
def defineRequests(workload,
                   requestInfo,
                   acdcCouchUrl,
                   acdcCouchDb,
                   requestor,
                   group,
                   dbsUrl,
                   fakeLocation,
                   datasetInformation=None):
    """
    _defineRequests_

    This is the ultimate function,
    it will create JSONs for the appropiate resubmission requests
    that can be feed into the reqmgr.py script and it will assemble
    acdc records that can be uploaded to the database.
    """
    main_now = time.mktime(time.gmtime())
    # First retrieve the run and block lists and load
    # the information of all datasets
    logging.debug("Original request info:\n%s", requestInfo)
    topTask = workload.getTopLevelTask()[0]
    runWhitelist = topTask.inputRunWhitelist()
    runBlacklist = topTask.inputRunBlacklist()
    blockWhitelist = topTask.inputBlockWhitelist()
    blockBlacklist = topTask.inputBlockBlacklist()
    inputDataset = workload.listInputDatasets()[0]
    outputModules = getOutputModules(workload)
    if datasetInformation is None:
        datasetInformation = {}
        logging.info("Loading DBS information for the datasets...")
        now = time.mktime(time.gmtime())
        datasetInformation[inputDataset] = getFiles(inputDataset,
                                                    runBlacklist,
                                                    runWhitelist,
                                                    blockBlacklist,
                                                    blockWhitelist,
                                                    dbsUrl,
                                                    fakeLocation=fakeLocation)
        print time.mktime(
            time.gmtime()) - now, "[s] for a call to getFiles", inputDataset
        for dataset in workload.listOutputDatasets():
            now = time.mktime(time.gmtime())
            datasetInformation[dataset] = getFiles(dataset, runBlacklist,
                                                   runWhitelist,
                                                   blockBlacklist,
                                                   blockWhitelist, dbsUrl)
            print time.mktime(
                time.gmtime()) - now, "[s] for a call to getFiles", dataset
        logging.info("Finished loading DBS information for the datasets...")

    # Now get the information about the datasets and tasks
    nodes, edges = buildDatasetTree(workload)
    logging.info("Dataset tree built...")
    for k, v in nodes.items():
        logging.debug("%s : %s" % (k, v))
    for k, v in edges.items():
        logging.debug("%s : %s" % (k, v))
    # Load the difference information between input and outputs
    differenceInformation = buildDifferenceMap(workload, datasetInformation)
    logging.info("Difference map processed...")
    logging.debug("%s" % str(differenceInformation))
    # Define an object that will hold the potential requests
    requests = []
    logging.info("Now definining the required requests...")
    # First generate requests for the datasets with children, that way we can
    # shoot the requests with skims in single requests
    for dataset in differenceInformation.keys():
        if dataset not in nodes:
            continue
        datasetsToRecover = [dataset]
        diffedLumis = differenceInformation[dataset]
        taskToRecover = edges[(inputDataset, dataset)]['task']
        outputModulesToRecover = edges[(inputDataset, dataset)]['outMod']
        intersectionDiff = {}
        for childDataset in nodes[dataset]:
            childDiffLumis = differenceInformation[childDataset]
            matchAvailable = False
            for run in diffedLumis:
                if run in childDiffLumis:
                    for lumi in diffedLumis[run]:
                        if lumi in childDiffLumis[run]:
                            matchAvailable = True
                            break
            if matchAvailable:
                outputModulesToRecover.extend(edges[(dataset,
                                                     childDataset)]['outMod'])
                datasetsToRecover.append(childDataset)
            for run in diffedLumis:
                if run in childDiffLumis:
                    if run not in intersectionDiff:
                        intersectionDiff[run] = set()
                        intersectionDiff[
                            run] = diffedLumis[run] & childDiffLumis[run]
                    else:
                        intersectionDiff[
                            run] &= diffedLumis[run] & childDiffLumis[run]
                else:
                    intersectionDiff[run] = set()
        for run in intersectionDiff:
            if not intersectionDiff[run]:
                del intersectionDiff[run]
        if not intersectionDiff:
            # Can't create request for this dataset + children
            continue
        for run in intersectionDiff:
            for childDataset in nodes[dataset]:
                childDiffLumis = differenceInformation[childDataset]
                if run in childDiffLumis:
                    childDiffLumis[run] -= intersectionDiff[run]
                    if not childDiffLumis[run]:
                        del childDiffLumis[run]
            diffedLumis[run] -= intersectionDiff[run]
            if not diffedLumis[run]:
                del diffedLumis[run]
        if not diffedLumis:
            del differenceInformation[dataset]
        for childDataset in nodes[dataset]:
            if not differenceInformation[childDataset]:
                del differenceInformation[childDataset]

        requestObject = {
            'task': taskToRecover,
            'input': inputDataset,
            'lumis': intersectionDiff,
            'outMod': outputModulesToRecover,
            'outputs': datasetsToRecover
        }
        requests.append(requestObject)
    # Now go through all the output datasets, creating a single request for
    # each
    for dataset in differenceInformation:
        datasetsToRecover = [dataset]
        diffedLumis = differenceInformation[dataset]
        if (inputDataset, dataset) in edges:
            taskToRecover = edges[(inputDataset, dataset)]['task']
            outputModulesToRecover = edges[(inputDataset, dataset)]['outMod']
            parentDataset = inputDataset
        else:
            for parentDataset in nodes:
                if dataset in nodes[parentDataset]:
                    taskToRecover = edges[(parentDataset, dataset)]['task']
                    outputModulesToRecover = edges[(parentDataset,
                                                    dataset)]['outMod']
                    break
        requestObject = {
            'task': taskToRecover,
            'input': parentDataset,
            'lumis': diffedLumis,
            'outMod': outputModulesToRecover,
            'outputs': datasetsToRecover
        }
        requests.append(requestObject)

    logging.info("About to upload ACDC records to: %s/%s" %
                 (acdcCouchUrl, acdcCouchDb))

    ## this printout is making a lot of crap
    ##pprint(requests)

    # With the request objects we need to build ACDC records and
    # request JSONs

    class CouchBuster(threading.Thread):
        def __init__(self, **args):
            threading.Thread.__init__(self)
            import copy
            for k, v in args.items():
                #if not k in ['c']:
                #    setattr(self,k,copy.deepcopy(v))
                #else:
                #    setattr(self,k,v)
                setattr(self, k, v)

        def run(self):

            lfn = self.lfn
            if self.v:
                print "Starting for", lfn
            now = time.mktime(time.gmtime())
            fileInfo = self.fi
            requestObject = self.ro
            fileRuns = {}
            acdcCouchUrl = self.ac
            acdcCouchDb = self.acd
            filesetName = self.fsn
            collection = self.c
            self.lumis = 0
            self.files = 0
            for run in fileInfo['runs']:
                if run in requestObject['lumis']:
                    for lumi in fileInfo['runs'][run][0]:
                        if lumi in requestObject['lumis'][run]:
                            if run not in fileRuns:
                                fileRuns[run] = []
                            fileRuns[run].append(lumi)
                            self.lumis += 1
            if fileRuns:
                self.files += 1
                fileset = CouchFileset(
                    **{
                        "url": acdcCouchUrl,
                        "database": acdcCouchDb,
                        "name": filesetName
                    })
                fileset.setCollection(collection)
                acdcRuns = []
                for run in fileRuns:
                    runObject = {}
                    runObject['run_number'] = int(run)
                    runObject['lumis'] = fileRuns[run]
                    acdcRuns.append(runObject)
                acdcFile = {
                    "lfn": lfn,
                    "first_event": 0,
                    "last_event": 0,
                    "checksums": {},
                    "size": fileInfo["size"],
                    "events": fileInfo["events"],
                    "merged": 1,
                    "parents": fileInfo["parents"],
                    "locations": fileInfo["locations"],
                    "runs": acdcRuns
                }
                #
                fileset.makeFilelist({lfn: acdcFile})
            if self.v:
                print time.mktime(
                    time.gmtime()) - now, "[s] for makeFilelist", lfn

    for idx, requestObject in enumerate(requests):
        now = time.mktime(time.gmtime())
        collectionName = '%s_%s' % (workload.name(), str(uuid.uuid1()))
        print time.mktime(time.gmtime(
        )) - now, "[s]", "starting", idx, "in collection name", collectionName
        filesetName = requestObject['task']
        collection = CouchCollection(**{
            "url": acdcCouchUrl,
            "database": acdcCouchDb,
            "name": collectionName
        })
        print time.mktime(time.gmtime()) - now, "[s]", "collection created"
        files = 0
        lumis = 0
        cthreads = []
        for lfn in datasetInformation[requestObject['input']]:
            cthreads.append(
                CouchBuster(lfn=lfn,
                            fi=datasetInformation[requestObject['input']][lfn],
                            ro=requestObject,
                            ac=acdcCouchUrl,
                            acd=acdcCouchDb,
                            fsn=filesetName,
                            c=collection,
                            v=False))

        print len(cthreads), "CouchBuster created"
        cthreads = ThreadBuster(cthreads, 40, 2., verbose=False)

        for t in cthreads:
            files += t.files
            lumis += t.lumis

        print time.mktime(time.gmtime()) - now, "[s]", "ending loop"
        # Put the creation parameters
        creationDict = jsonBlob["createRequest"]
        creationDict["OriginalRequestName"] = str(workload.name())
        creationDict["InitialTaskPath"] = requestObject['task']
        creationDict["CollectionName"] = collectionName
        creationDict["IgnoredOutputModules"] = list(
            set(outputModules) - set(requestObject['outMod']))
        creationDict["ACDCServer"] = acdcCouchUrl
        creationDict["ACDCDatabase"] = acdcCouchDb
        #creationDict["RequestString"] = "r-%d-%s" % (idx, workload.name())[:50]
        creationDict["RequestString"] = "r-%d-%s" % (
            idx, requestInfo['RequestString'])
        creationDict["Requestor"] = requestor
        creationDict["RequestPriority"] = min(
            500000,
            requestInfo.get('RequestPriority', 60000) * 2)
        creationDict["Group"] = group
        creationDict["TimePerEvent"] = requestInfo['TimePerEvent']
        creationDict["Memory"] = requestInfo['Memory']
        creationDict["SizePerEvent"] = requestInfo['SizePerEvent']
        creationDict["PrepID"] = requestInfo.get('PrepID')
        creationDict["Campaign"] = requestInfo.get('Campaign')

        # Assign parameters
        assignDict = jsonBlob["assignRequest"]
        team = requestInfo['Team']
        processingString = requestInfo['ProcessingString']
        processingVersion = requestInfo['ProcessingVersion']
        acqEra = requestInfo['AcquisitionEra']
        mergedLFNBase = requestInfo['MergedLFNBase']
        unmergedLFNBase = requestInfo['UnmergedLFNBase']
        # processingString = workload.getProcessingString()
        # processingVersion = workload.getProcessingVersion()
        # acqEra = workload.getAcquisitionEra()
        # mergedLFNBase = workload.getMergedLFNBase()
        # unmergedLFNBase = workload.getUnmergedLFNBase()
        topTask = workload.getTopLevelTask()[0]
        siteWhitelist = topTask.siteWhitelist()
        assignDict["SiteWhitelist"] = siteWhitelist
        assignDict["MergedLFNBase"] = mergedLFNBase
        assignDict["UnmergedLFNBase"] = unmergedLFNBase
        assignDict["AcquisitionEra"] = acqEra
        assignDict["Team"] = team
        try:
            int(processingVersion)
            assignDict["ProcessingVersion"] = int(processingVersion)
            if processingString is not None and processingString != 'None':
                assignDict["ProcessingString"] = processingString
        except Exception:
            tokens = processingVersion.split('-')
            assignDict["ProcessingVersion"] = int(tokens[-1][1:])
            assignDict["ProcessingString"] = ('-').join(tokens[:-1])
        print time.mktime(time.gmtime()) - now, "[s]", "data prepared"
        fileHandle = open('%s.json' % creationDict["RequestString"], 'w')
        json.dump(jsonBlob, fileHandle)
        fileHandle.close()
        print time.mktime(time.gmtime()) - now, "[s]", "json made"
        logging.info("Created JSON %s for recovery of %s" %
                     ('%s.json' % creationDict["RequestString"],
                      requestObject['outputs']))
        logging.info("This will recover %d lumis in %d files" % (lumis, files))
    print time.mktime(time.gmtime()) - main_now, "[s]", "to complete"
Ejemplo n.º 21
0
    def testL_CascadeCloseOutAnnnouncement(self):
        """
        _testL_CascadeCloseOutAnnouncement_

        Test the cascade closeout REST call, also
        check that when announced a request deletes all ACDC records in the system.
        """
        userName     = '******'
        groupName    = 'Li'
        teamName     = 'Tang'
        schema       = utils.getAndSetupSchema(self,
                                               userName = userName,
                                               groupName = groupName,
                                               teamName = teamName)
        configID = self.createConfig()
        schema["ConfigCacheID"] = configID
        schema["CouchDBName"] = self.couchDBName
        schema["CouchURL"]    = os.environ.get("COUCHURL")

        result = self.jsonSender.put("request", schema)[0]
        originalRequest = result['RequestName']
        self.setupACDCDatabase(originalRequest, "/%s/DataProcessing" % originalRequest,
                               result['Requestor'], result['Group'])
        depth = 2
        nReq = 3
        requests = [originalRequest]
        def createChildrenRequest(parentRequest, i, nReq):
            createdRequests = []
            resubSchema = utils.getResubmissionSchema(parentRequest, "/%s/DataProcessing" % parentRequest,
                                                      groupName, userName)
            result = self.jsonSender.put("request", resubSchema)[0]
            requestName = result['RequestName']
            self.setupACDCDatabase(requestName, "/%s/DataProcessing" % requestName, result['Requestor'], result['Group'])
            createdRequests.append(requestName)
            if i:
                for _ in range(nReq):
                    createdRequests.extend(createChildrenRequest(requestName, i - 1, nReq))
            return createdRequests
        requests.extend(createChildrenRequest(originalRequest, depth, nReq))
        for request in requests:
            self.changeStatusAndCheck(request, 'assignment-approved')
        for request in requests:
            self.jsonSender.put("assignment?team=%s&requestName=%s" % (teamName, request))
        for status in ['acquired',
                       'running-open', 'running-closed',
                       'completed']:
            for request in requests:
                self.changeStatusAndCheck(request, status)
        self.jsonSender.post('closeout?requestName=%s&cascade=True' % originalRequest)
        svc = CouchService(url = self.testInit.couchUrl,
                                  database = "%s_acdc" % self.couchDBName)

        owner = svc.newOwner(groupName, userName)
        for request in requests:
            result = self.jsonSender.get('request/%s' % request)
            self.assertEqual(result[0]['RequestStatus'], 'closed-out')
            testCollection = CouchCollection(database = self.testInit.couchDbName,
                                             url = self.testInit.couchUrl,
                                             name = request)
            testCollection.setOwner(owner)
            testCollection.populate()
            self.assertNotEqual(len(testCollection["filesets"]), 0)

        self.jsonSender.post('announce?requestName=%s&cascade=True' % originalRequest)
        for request in requests:

            result = self.jsonSender.get('request/%s' % request)
            self.assertEqual(result[0]['RequestStatus'], 'announced')
            testCollection = CouchCollection(database = self.testInit.couchDbName,
                                             url = self.testInit.couchUrl,
                                             name = request)
            testCollection.setOwner(owner)
            testCollection.populate()
            self.assertEqual(len(testCollection["filesets"]), 0)
Ejemplo n.º 22
0
    def populateCouchDB(self):
        """
        _populateCouchDB_

        Populate the ACDC records
        """
        svc = CouchService(url=self.testInit.couchUrl,
                           database=self.testInit.couchDbName)

        testCollectionA = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="Thunderstruck")
        testCollectionB = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="Struckthunder")
        testCollectionC = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="Thunderstruck")
        testCollectionD = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="Thunderstruck")

        testFilesetA = CouchFileset(database=self.testInit.couchDbName,
                                    url=self.testInit.couchUrl,
                                    name="TestFilesetA")
        testCollectionA.addFileset(testFilesetA)
        testFilesetB = CouchFileset(database=self.testInit.couchDbName,
                                    url=self.testInit.couchUrl,
                                    name="TestFilesetB")
        testCollectionB.addFileset(testFilesetB)
        testFilesetC = CouchFileset(database=self.testInit.couchDbName,
                                    url=self.testInit.couchUrl,
                                    name="TestFilesetC")
        testCollectionC.addFileset(testFilesetC)
        testFilesetD = CouchFileset(database=self.testInit.couchDbName,
                                    url=self.testInit.couchUrl,
                                    name="TestFilesetD")
        testCollectionC.addFileset(testFilesetD)

        testFiles = []
        for i in range(5):
            testFile = File(lfn=makeUUID(),
                            size=random.randint(1024, 4096),
                            events=random.randint(1024, 4096))
            testFiles.append(testFile)

        testFilesetA.add(testFiles)
        time.sleep(1)
        testFilesetB.add(testFiles)
        time.sleep(1)
        testFilesetC.add(testFiles)
        time.sleep(2)
        testFilesetD.add(testFiles)
Ejemplo n.º 23
0
def defineRequests(workload, requestInfo,
                   acdcCouchUrl, acdcCouchDb,
                   requestor, group,
                   dbsUrl,
                   fakeLocation,
                   datasetInformation = None):
    """
    _defineRequests_

    This is the ultimate function,
    it will create JSONs for the appropiate resubmission requests
    that can be feed into the reqmgr.py script and it will assemble
    acdc records that can be uploaded to the database.
    """
    # First retrieve the run and block lists and load
    # the information of all datasets
    logging.debug("Original request info:\n%s", requestInfo)
    topTask = workload.getTopLevelTask()[0]
    runWhitelist = topTask.inputRunWhitelist()
    runBlacklist = topTask.inputRunBlacklist()
    blockWhitelist = topTask.inputBlockWhitelist()
    blockBlacklist = topTask.inputBlockBlacklist()
    inputDataset = workload.listInputDatasets()[0]
    outputModules = getOutputModules(workload)
    if datasetInformation is None:
        datasetInformation = {}
        logging.info("Loading DBS information for the datasets...")
        datasetInformation[inputDataset] = getFiles(inputDataset, runBlacklist, runWhitelist,
                                                    blockBlacklist, blockWhitelist, dbsUrl, fakeLocation=fakeLocation)
        for dataset in workload.listOutputDatasets():
            datasetInformation[dataset] = getFiles(dataset, runBlacklist, runWhitelist, blockBlacklist, blockWhitelist, dbsUrl)
        logging.info("Finished loading DBS information for the datasets...")

    # Now get the information about the datasets and tasks
    nodes, edges = buildDatasetTree(workload)
    logging.info("Dataset tree built...")
    for k,v in nodes.items():
        logging.debug("%s : %s" % (k,v))
    for k,v in edges.items():
        logging.debug("%s : %s" % (k,v))
    # Load the difference information between input and outputs
    differenceInformation = buildDifferenceMap(workload, datasetInformation)
    logging.info("Difference map processed...")
    logging.debug("%s" % str(differenceInformation))
    # Define an object that will hold the potential requests
    requests = []
    logging.info("Now definining the required requests...")
    # First generate requests for the datasets with children, that way we can
    # shoot the requests with skims in single requests
    for dataset in differenceInformation.keys():
        if dataset not in nodes:
            continue
        datasetsToRecover = [dataset]
        diffedLumis = differenceInformation[dataset]
        taskToRecover = edges[(inputDataset, dataset)]['task']
        outputModulesToRecover = edges[(inputDataset,dataset)]['outMod']
        intersectionDiff = {}
        for childDataset in nodes[dataset]:
            childDiffLumis = differenceInformation[childDataset]
            matchAvailable = False
            for run in diffedLumis:
                if run in childDiffLumis:
                    for lumi in diffedLumis[run]:
                        if lumi in childDiffLumis[run]:
                            matchAvailable = True
                            break
            if matchAvailable:
                outputModulesToRecover.extend(edges[(dataset, childDataset)]['outMod'])
                datasetsToRecover.append(childDataset)
            for run in diffedLumis:
                if run in childDiffLumis:
                    if run not in intersectionDiff:
                        intersectionDiff[run] = set()
                        intersectionDiff[run] = diffedLumis[run] & childDiffLumis[run]
                    else:
                        intersectionDiff[run] &= diffedLumis[run] & childDiffLumis[run]
                else:
                    intersectionDiff[run] = set()
        for run in intersectionDiff:
            if not intersectionDiff[run]:
                del intersectionDiff[run]
        if not intersectionDiff:
            # Can't create request for this dataset + children
            continue
        for run in intersectionDiff:
            for childDataset in nodes[dataset]:
                childDiffLumis = differenceInformation[childDataset]
                if run in childDiffLumis:
                    childDiffLumis[run] -= intersectionDiff[run]
                    if not childDiffLumis[run]:
                        del childDiffLumis[run]
            diffedLumis[run] -= intersectionDiff[run]
            if not diffedLumis[run]:
                del diffedLumis[run]
        if not diffedLumis:
            del differenceInformation[dataset]
        for childDataset in nodes[dataset]:
            if not differenceInformation[childDataset]:
                del differenceInformation[childDataset]

        requestObject = {'task' : taskToRecover,
                         'input' : inputDataset,
                         'lumis' : intersectionDiff,
                         'outMod' : outputModulesToRecover,
                         'outputs' : datasetsToRecover}
        requests.append(requestObject)
    # Now go through all the output datasets, creating a single request for
    # each
    for dataset in differenceInformation:
        datasetsToRecover = [dataset]
        diffedLumis = differenceInformation[dataset]
        if (inputDataset, dataset) in edges:
            taskToRecover = edges[(inputDataset, dataset)]['task']
            outputModulesToRecover = edges[(inputDataset,dataset)]['outMod']
            parentDataset = inputDataset
        else:
            for parentDataset in nodes:
                if dataset in nodes[parentDataset]:
                    taskToRecover = edges[(parentDataset, dataset)]['task']
                    outputModulesToRecover = edges[(parentDataset,dataset)]['outMod']
                    break
        requestObject = {'task' : taskToRecover,
                         'input' : parentDataset,
                         'lumis' : diffedLumis,
                         'outMod' : outputModulesToRecover,
                         'outputs' : datasetsToRecover}
        requests.append(requestObject)

    logging.info("About to upload ACDC records to: %s/%s" % (acdcCouchUrl, acdcCouchDb))
    pprint(requests)
    # With the request objects we need to build ACDC records and
    # request JSONs
    for idx, requestObject in enumerate(requests):
        collectionName = '%s_%s' % (workload.name(), str(uuid.uuid1()))
        filesetName = requestObject['task']
        collection = CouchCollection(**{"url" : acdcCouchUrl,
                                      "database" : acdcCouchDb,
                                      "name" : collectionName})
        owner = makeUser(workload.getOwner()['group'], workload.getOwner()['name'], acdcCouchUrl, acdcCouchDb)
        collection.setOwner(owner)
        files = 0
        lumis = 0
        for lfn in datasetInformation[requestObject['input']]:
            fileInfo = datasetInformation[requestObject['input']][lfn]
            fileRuns = {}
            for run in fileInfo['runs']:
                if run in requestObject['lumis']:
                    for lumi in fileInfo['runs'][run][0]:
                        if lumi in requestObject['lumis'][run]:
                            if run not in fileRuns:
                                fileRuns[run] = []
                            fileRuns[run].append(lumi)
                            lumis += 1
            if fileRuns:
                files += 1
                fileset = CouchFileset(**{"url" : acdcCouchUrl,
                                        "database" : acdcCouchDb,
                                        "name" : filesetName})
                fileset.setCollection(collection)
                acdcRuns = []
                for run in fileRuns:
                    runObject = {}
                    runObject['run_number'] = int(run)
                    runObject['lumis'] = fileRuns[run]
                    acdcRuns.append(runObject)
                acdcFile = {"lfn" : lfn,
                            "first_event" : 0,
                            "last_event" : 0,
                            "checksums" : {},
                            "size" : fileInfo["size"],
                            "events" : fileInfo["events"],
                            "merged" : 1,
                            "parents" : fileInfo["parents"],
                            "locations" : fileInfo["locations"],
                            "runs" : acdcRuns
                            }
                fileset.makeFilelist({lfn : acdcFile})

        # Put the creation parameters
        creationDict = jsonBlob["createRequest"]
        creationDict["OriginalRequestName"] = str(workload.name())
        creationDict["InitialTaskPath"] = requestObject['task']
        creationDict["CollectionName"] = collectionName
        creationDict["IgnoredOutputModules"] = list(set(outputModules) - set(requestObject['outMod']))
        creationDict["ACDCServer"] = acdcCouchUrl
        creationDict["ACDCDatabase"] = acdcCouchDb
        creationDict["RequestString"] = "recovery-%d-%s" % (idx, workload.name()[:-18])
        creationDict["Requestor"] = requestor
        creationDict["Group"] = group
        creationDict["TimePerEvent"] = requestInfo['TimePerEvent']
        creationDict["Memory"] = requestInfo['Memory']
        creationDict["SizePerEvent"] = requestInfo['SizePerEvent']
        creationDict["PrepID"] = requestInfo.get('PrepID')
        creationDict["Campaign"] = requestInfo.get('Campaign')

        # Assign parameters
        assignDict = jsonBlob["assignRequest"]
        team = requestInfo['Teams'][0]
        processingString = requestInfo['ProcessingString']
        processingVersion = requestInfo['ProcessingVersion']
        acqEra = requestInfo['AcquisitionEra']
        mergedLFNBase = requestInfo['MergedLFNBase']
        unmergedLFNBase = requestInfo['UnmergedLFNBase']
        #processingString = workload.getProcessingString()
        #processingVersion = workload.getProcessingVersion()
        #acqEra = workload.getAcquisitionEra()
        #mergedLFNBase = workload.getMergedLFNBase()
        #unmergedLFNBase = workload.getUnmergedLFNBase()
        topTask = workload.getTopLevelTask()[0]
        siteWhitelist = topTask.siteWhitelist()
        assignDict["SiteWhitelist"] = siteWhitelist
        assignDict["MergedLFNBase"] = mergedLFNBase
        assignDict["UnmergedLFNBase"] = unmergedLFNBase
        assignDict["AcquisitionEra"] = acqEra
        assignDict["Team"] = team
        try:
            int(processingVersion)
            assignDict["ProcessingVersion"] = int(processingVersion)
            if processingString is not None and processingString != 'None':
                assignDict["ProcessingString"] = processingString
        except Exception:
            tokens = processingVersion.split('-')
            assignDict["ProcessingVersion"] = int(tokens[-1][1:])
            assignDict["ProcessingString"] = ('-').join(tokens[:-1])

        fileHandle = open('%s.json' % creationDict["RequestString"], 'w')
        json.dump(jsonBlob, fileHandle)
        fileHandle.close()
        logging.info("Created JSON %s for recovery of %s" % ('%s.json' % creationDict["RequestString"],
                                                             requestObject['outputs']))
        logging.info("This will recover %d lumis in %d files" % (lumis, files))
Ejemplo n.º 24
0
    def populateCouchDB(self):
        """
        _populateCouchDB_

        Populate the ACDC records
        """
        svc = CouchService(url=self.testInit.couchUrl, database=self.testInit.couchDbName)

        ownerA = svc.newOwner("somegroup", "someuserA")
        ownerB = svc.newOwner("somegroup", "someuserB")

        testCollectionA = CouchCollection(
            database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="Thunderstruck"
        )
        testCollectionA.setOwner(ownerA)
        testCollectionB = CouchCollection(
            database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="Struckthunder"
        )
        testCollectionB.setOwner(ownerA)
        testCollectionC = CouchCollection(
            database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="Thunderstruck"
        )
        testCollectionC.setOwner(ownerB)
        testCollectionD = CouchCollection(
            database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="Thunderstruck"
        )
        testCollectionD.setOwner(ownerB)

        testFilesetA = CouchFileset(database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="TestFilesetA")
        testCollectionA.addFileset(testFilesetA)
        testFilesetB = CouchFileset(database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="TestFilesetB")
        testCollectionB.addFileset(testFilesetB)
        testFilesetC = CouchFileset(database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="TestFilesetC")
        testCollectionC.addFileset(testFilesetC)
        testFilesetD = CouchFileset(database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="TestFilesetD")
        testCollectionC.addFileset(testFilesetD)

        testFiles = []
        for i in range(5):
            testFile = File(lfn=makeUUID(), size=random.randint(1024, 4096), events=random.randint(1024, 4096))
            testFiles.append(testFile)

        testFilesetA.add(testFiles)
        time.sleep(1)
        testFilesetB.add(testFiles)
        time.sleep(1)
        testFilesetC.add(testFiles)
        time.sleep(2)
        testFilesetD.add(testFiles)
Ejemplo n.º 25
0
    def testListCollectionsFilesets(self):
        """
        _testListCollectionsFilesets_

        Verify that collections and filesets in ACDC can be listed.
        """
        svc = CouchService(url=self.testInit.couchUrl,
                           database=self.testInit.couchDbName)

        testCollectionA = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="Thunderstruck")
        testCollectionB = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="Struckthunder")
        testCollectionC = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="Thunderstruck")
        testCollectionD = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="Thunderstruck")

        testFilesetA = CouchFileset(database=self.testInit.couchDbName,
                                    url=self.testInit.couchUrl,
                                    name="TestFilesetA")
        testCollectionA.addFileset(testFilesetA)
        testFilesetB = CouchFileset(database=self.testInit.couchDbName,
                                    url=self.testInit.couchUrl,
                                    name="TestFilesetB")
        testCollectionB.addFileset(testFilesetB)
        testFilesetC = CouchFileset(database=self.testInit.couchDbName,
                                    url=self.testInit.couchUrl,
                                    name="TestFilesetC")
        testCollectionC.addFileset(testFilesetC)
        testFilesetD = CouchFileset(database=self.testInit.couchDbName,
                                    url=self.testInit.couchUrl,
                                    name="TestFilesetD")
        testCollectionC.addFileset(testFilesetD)

        testFiles = []
        for i in range(5):
            testFile = File(lfn=makeUUID(),
                            size=random.randint(1024, 4096),
                            events=random.randint(1024, 4096))
            testFiles.append(testFile)

        testFilesetA.add(testFiles)
        testFilesetB.add(testFiles)
        testFilesetC.add(testFiles)
        testFilesetD.add(testFiles)

        goldenFilesetNames = ["TestFilesetA", "TestFilesetC", "TestFilesetD"]
        for fileset in svc.listFilesets(testCollectionD):
            self.assertTrue(fileset["name"] in goldenFilesetNames,
                            "Error: Missing fileset.")
            goldenFilesetNames.remove(fileset["name"])
        self.assertEqual(len(goldenFilesetNames), 0,
                         "Error: Missing filesets.")

        return
Ejemplo n.º 26
0
    def testDropCount(self):
        """
        _testDropCount_

        Verify that dropping a fileset and counting the files in a fileset works
        correctly.
        """
        testCollectionA = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="Thunderstruck")
        testCollectionB = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="StruckThunder")

        testFiles = []
        for i in range(5):
            testFile = File(lfn=makeUUID(),
                            size=random.randint(1024, 4096),
                            events=random.randint(1024, 4096))
            testFiles.append(testFile)

        testFilesetA = CouchFileset(database=self.testInit.couchDbName,
                                    url=self.testInit.couchUrl,
                                    name="TestFilesetA")
        testFilesetB = CouchFileset(database=self.testInit.couchDbName,
                                    url=self.testInit.couchUrl,
                                    name="TestFilesetB")
        testFilesetC = CouchFileset(database=self.testInit.couchDbName,
                                    url=self.testInit.couchUrl,
                                    name="TestFilesetC")
        testCollectionA.addFileset(testFilesetA)
        testCollectionB.addFileset(testFilesetB)
        testCollectionB.addFileset(testFilesetC)
        testFilesetA.add(testFiles)
        testFilesetB.add(testFiles)
        testFilesetC.add(testFiles)

        testFilesetC.drop()

        testCollectionC = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="StruckThunder")
        testCollectionC.populate()

        self.assertEqual(
            len(testCollectionC["filesets"]), 1,
            "Error: There should be one fileset in this collection.")
        self.assertEqual(testCollectionC["filesets"][0].fileCount(), 5,
                         "Error: Wrong number of files in fileset.")

        testCollectionD = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="Thunderstruck")
        testCollectionD.populate()

        self.assertEqual(
            len(testCollectionD["filesets"]), 1,
            "Error: There should be one fileset in this collection.")
        self.assertEqual(testCollectionD["filesets"][0].fileCount(), 5,
                         "Error: Wrong number of files in fileset.")
        return
Ejemplo n.º 27
0
    def testL_CascadeCloseOutAnnnouncement(self):
        """
        _testL_CascadeCloseOutAnnouncement_

        Test the cascade closeout REST call, also
        check that when announced a request deletes all ACDC records in the system.
        """
        userName     = '******'
        groupName    = 'Li'
        teamName     = 'Tang'
        schema       = utils.getAndSetupSchema(self,
                                               userName = userName,
                                               groupName = groupName,
                                               teamName = teamName)
        configID = self.createConfig()
        schema["ConfigCacheID"] = configID
        schema["CouchDBName"] = self.couchDBName
        schema["CouchURL"]    = os.environ.get("COUCHURL")

        result = self.jsonSender.put("request", schema)[0]
        originalRequest = result['RequestName']
        self.setupACDCDatabase(originalRequest, "/%s/DataProcessing" % originalRequest,
                               result['Requestor'], result['Group'])
        depth = 2
        nReq = 3
        requests = [originalRequest]
        def createChildrenRequest(parentRequest, i, nReq):
            createdRequests = []
            resubSchema = utils.getResubmissionSchema(parentRequest, "/%s/DataProcessing" % parentRequest,
                                                      groupName, userName)
            result = self.jsonSender.put("request", resubSchema)[0]
            requestName = result['RequestName']
            self.setupACDCDatabase(requestName, "/%s/DataProcessing" % requestName, result['Requestor'], result['Group'])
            createdRequests.append(requestName)
            if i:
                for _ in range(nReq):
                    createdRequests.extend(createChildrenRequest(requestName, i - 1, nReq))
            return createdRequests
        requests.extend(createChildrenRequest(originalRequest, depth, nReq))
        for request in requests:
            self.changeStatusAndCheck(request, 'assignment-approved')
        for request in requests:
            self.jsonSender.put("assignment?team=%s&requestName=%s" % (teamName, request))
        for status in ['acquired',
                       'running-open', 'running-closed',
                       'completed']:
            for request in requests:
                self.changeStatusAndCheck(request, status)
        self.jsonSender.post('closeout?requestName=%s&cascade=True' % originalRequest)
        svc = CouchService(url = self.testInit.couchUrl,
                                  database = "%s_acdc" % self.couchDBName)

        owner = svc.newOwner(groupName, userName)
        for request in requests:
            result = self.jsonSender.get('request/%s' % request)
            self.assertEqual(result[0]['RequestStatus'], 'closed-out')
            testCollection = CouchCollection(database = self.testInit.couchDbName,
                                             url = self.testInit.couchUrl,
                                             name = request)
            testCollection.setOwner(owner)
            testCollection.populate()
            self.assertNotEqual(len(testCollection["filesets"]), 0)

        self.jsonSender.post('announce?requestName=%s&cascade=True' % originalRequest)
        for request in requests:

            result = self.jsonSender.get('request/%s' % request)
            self.assertEqual(result[0]['RequestStatus'], 'announced')
            testCollection = CouchCollection(database = self.testInit.couchDbName,
                                             url = self.testInit.couchUrl,
                                             name = request)
            testCollection.setOwner(owner)
            testCollection.populate()
            self.assertEqual(len(testCollection["filesets"]), 0)
Ejemplo n.º 28
0
def defineRequests(workload,
                   requestInfo,
                   acdcCouchUrl,
                   acdcCouchDb,
                   requestor,
                   group,
                   dbsUrl,
                   datasetInformation=None):
    """
    _defineRequests_

    This is the ultimate function,
    it will create JSONs for the appropiate resubmission requests
    that can be feed into the reqmgr.py script and it will assemble
    acdc records that can be uploaded to the database.
    """
    # First retrieve the run and block lists and load
    # the information of all datasets
    topTask = workload.getTopLevelTask()[0]
    runWhitelist = topTask.inputRunWhitelist()
    runBlacklist = topTask.inputRunBlacklist()
    blockWhitelist = topTask.inputBlockWhitelist()
    blockBlacklist = topTask.inputBlockBlacklist()
    inputDataset = workload.listInputDatasets()[0]
    outputModules = getOutputModules(workload)
    if datasetInformation is None:
        datasetInformation = {}
        logging.info("Loading DBS information for the datasets...")
        datasetInformation[inputDataset] = getFiles(inputDataset, runBlacklist,
                                                    runWhitelist,
                                                    blockBlacklist,
                                                    blockWhitelist, dbsUrl)
        for dataset in workload.listOutputDatasets():
            datasetInformation[dataset] = getFiles(dataset, runBlacklist,
                                                   runWhitelist,
                                                   blockBlacklist,
                                                   blockWhitelist, dbsUrl)
        logging.info("Finished loading DBS information for the datasets...")
    # Now get the information about the datasets and tasks
    nodes, edges = buildDatasetTree(workload)
    logging.info("Dataset tree built...")
    for k, v in nodes.items():
        logging.debug("%s : %s" % (k, v))
    for k, v in edges.items():
        logging.debug("%s : %s" % (k, v))
    # Load the difference information between input and outputs
    differenceInformation = buildDifferenceMap(workload, datasetInformation)
    logging.info("Difference map processed...")
    logging.debug("%s" % str(differenceInformation))
    # Define an object that will hold the potential requests
    requests = []
    logging.info("Now definining the required requests...")
    # First generate requests for the datasets with children, that way we can
    # shoot the requests with skims in single requests
    for dataset in differenceInformation.keys():
        if dataset not in nodes:
            continue
        datasetsToRecover = [dataset]
        diffedLumis = differenceInformation[dataset]
        taskToRecover = edges[(inputDataset, dataset)]['task']
        outputModulesToRecover = edges[(inputDataset, dataset)]['outMod']
        intersectionDiff = {}
        for childDataset in nodes[dataset]:
            childDiffLumis = differenceInformation[childDataset]
            matchAvailable = False
            for run in diffedLumis:
                if run in childDiffLumis:
                    for lumi in diffedLumis[run]:
                        if lumi in childDiffLumis[run]:
                            matchAvailable = True
                            break
            if matchAvailable:
                outputModulesToRecover.extend(edges[(dataset,
                                                     childDataset)]['outMod'])
                datasetsToRecover.append(childDataset)
            for run in diffedLumis:
                if run in childDiffLumis:
                    if run not in intersectionDiff:
                        intersectionDiff[run] = set()
                        intersectionDiff[
                            run] = diffedLumis[run] & childDiffLumis[run]
                    else:
                        intersectionDiff[
                            run] &= diffedLumis[run] & childDiffLumis[run]
                else:
                    intersectionDiff[run] = set()
        for run in intersectionDiff:
            if not intersectionDiff[run]:
                del intersectionDiff[run]
        if not intersectionDiff:
            # Can't create request for this dataset + children
            continue
        for run in intersectionDiff:
            for childDataset in nodes[dataset]:
                childDiffLumis = differenceInformation[childDataset]
                if run in childDiffLumis:
                    childDiffLumis[run] -= intersectionDiff[run]
                    if not childDiffLumis[run]:
                        del childDiffLumis[run]
            diffedLumis[run] -= intersectionDiff[run]
            if not diffedLumis[run]:
                del diffedLumis[run]
        if not diffedLumis:
            del differenceInformation[dataset]
        for childDataset in nodes[dataset]:
            if not differenceInformation[childDataset]:
                del differenceInformation[childDataset]

        requestObject = {
            'task': taskToRecover,
            'input': inputDataset,
            'lumis': intersectionDiff,
            'outMod': outputModulesToRecover,
            'outputs': datasetsToRecover
        }
        requests.append(requestObject)
    # Now go through all the output datasets, creating a single request for
    # each
    for dataset in differenceInformation:
        datasetsToRecover = [dataset]
        diffedLumis = differenceInformation[dataset]
        if (inputDataset, dataset) in edges:
            taskToRecover = edges[(inputDataset, dataset)]['task']
            outputModulesToRecover = edges[(inputDataset, dataset)]['outMod']
            parentDataset = inputDataset
        else:
            for parentDataset in nodes:
                if dataset in nodes[parentDataset]:
                    taskToRecover = edges[(parentDataset, dataset)]['task']
                    outputModulesToRecover = edges[(parentDataset,
                                                    dataset)]['outMod']
                    break
        requestObject = {
            'task': taskToRecover,
            'input': parentDataset,
            'lumis': diffedLumis,
            'outMod': outputModulesToRecover,
            'outputs': datasetsToRecover
        }
        requests.append(requestObject)

    logging.info("About to upload ACDC records to: %s/%s" %
                 (acdcCouchUrl, acdcCouchDb))
    # With the request objects we need to build ACDC records and
    # request JSONs
    pprint(requests)
    for idx, requestObject in enumerate(requests):
        collectionName = '%s_%s' % (workload.name(), str(uuid.uuid1()))
        filesetName = requestObject['task']
        collection = CouchCollection(**{
            "url": acdcCouchUrl,
            "database": acdcCouchDb,
            "name": collectionName
        })
        owner = makeUser(workload.getOwner()['group'],
                         workload.getOwner()['name'], acdcCouchUrl,
                         acdcCouchDb)
        collection.setOwner(owner)
        files = 0
        lumis = 0
        for lfn in datasetInformation[requestObject['input']]:
            fileInfo = datasetInformation[requestObject['input']][lfn]
            fileRuns = {}
            for run in fileInfo['runs']:
                if run in requestObject['lumis']:
                    for lumi in fileInfo['runs'][run][0]:
                        if lumi in requestObject['lumis'][run]:
                            if run not in fileRuns:
                                fileRuns[run] = []
                            fileRuns[run].append(lumi)
                            lumis += 1
            if fileRuns:
                files += 1
                fileset = CouchFileset(
                    **{
                        "url": acdcCouchUrl,
                        "database": acdcCouchDb,
                        "name": filesetName
                    })
                fileset.setCollection(collection)
                acdcRuns = []
                for run in fileRuns:
                    runObject = {}
                    runObject['run_number'] = int(run)
                    runObject['lumis'] = fileRuns[run]
                    acdcRuns.append(runObject)
                acdcFile = {
                    "lfn": lfn,
                    "first_event": 0,
                    "last_event": 0,
                    "checksums": {},
                    "size": fileInfo["size"],
                    "events": fileInfo["events"],
                    "merged": 1,
                    "parents": fileInfo["parents"],
                    "locations": fileInfo["locations"],
                    "runs": acdcRuns
                }
                fileset.makeFilelist({lfn: acdcFile})

        # Put the creation parameters
        creationDict = jsonBlob["createRequest"]
        creationDict["OriginalRequestName"] = str(workload.name())
        creationDict["InitialTaskPath"] = requestObject['task']
        creationDict["CollectionName"] = collectionName
        creationDict["IgnoredOutputModules"] = list(
            set(outputModules) - set(requestObject['outMod']))
        creationDict["ACDCServer"] = acdcCouchUrl
        creationDict["ACDCDatabase"] = acdcCouchDb
        creationDict["RequestString"] = "recovery-%d-%s" % (
            idx, workload.name()[:-18])
        creationDict["Requestor"] = requestor
        creationDict["Group"] = group

        # Assign parameters
        assignDict = jsonBlob["assignRequest"]
        team = requestInfo['teams'][0]
        processingString = workload.getProcessingString()
        processingVersion = workload.getProcessingVersion()
        acqEra = workload.getAcquisitionEra()
        mergedLFNBase = workload.getMergedLFNBase()
        unmergedLFNBase = workload.getUnmergedLFNBase()
        topTask = workload.getTopLevelTask()[0]
        siteWhitelist = topTask.siteWhitelist()
        assignDict["SiteWhitelist"] = siteWhitelist
        assignDict["MergedLFNBase"] = mergedLFNBase
        assignDict["UnmergedLFNBase"] = unmergedLFNBase
        assignDict["AcquisitionEra"] = acqEra
        assignDict["Team"] = team
        try:
            int(processingVersion)
            assignDict["ProcessingVersion"] = int(processingVersion)
            if processingString is not None and processingString != 'None':
                assignDict["ProcessingString"] = processingString
        except Exception:
            tokens = processingVersion.split('-')
            assignDict["ProcessingVersion"] = int(tokens[-1][1:])
            assignDict["ProcessingString"] = ('-').join(tokens[:-1])

        fileHandle = open('%s.json' % creationDict["RequestString"], 'w')
        json.dump(jsonBlob, fileHandle)
        fileHandle.close()
        logging.info("Created JSON %s for recovery of %s" %
                     ('%s.json' % creationDict["RequestString"],
                      requestObject['outputs']))
        logging.info("This will recover %d lumis in %d files" % (lumis, files))
Ejemplo n.º 29
0
    def testListCollectionsFilesets(self):
        """
        _testListCollectionsFilesets_

        Verify that collections and filesets in ACDC can be listed.
        """
        svc = CouchService(url = self.testInit.couchUrl,
                           database = self.testInit.couchDbName)

        ownerA = svc.newOwner("somegroup", "someuserA")
        ownerB = svc.newOwner("somegroup", "someuserB")
        
        testCollectionA = CouchCollection(database = self.testInit.couchDbName,
                                          url = self.testInit.couchUrl, 
                                          name = "Thunderstruck")
        testCollectionA.setOwner(ownerA)
        testCollectionB = CouchCollection(database = self.testInit.couchDbName,
                                          url = self.testInit.couchUrl, 
                                          name = "Struckthunder")
        testCollectionB.setOwner(ownerA)
        testCollectionC = CouchCollection(database = self.testInit.couchDbName,
                                          url = self.testInit.couchUrl, 
                                          name = "Thunderstruck")
        testCollectionC.setOwner(ownerB)
        testCollectionD = CouchCollection(database = self.testInit.couchDbName,
                                          url = self.testInit.couchUrl, 
                                          name = "Thunderstruck")
        testCollectionD.setOwner(ownerB)        
        
        testFilesetA = CouchFileset(database = self.testInit.couchDbName,
                                    url = self.testInit.couchUrl,
                                    name = "TestFilesetA")
        testCollectionA.addFileset(testFilesetA)
        testFilesetB = CouchFileset(database = self.testInit.couchDbName,
                                    url = self.testInit.couchUrl,
                                    name = "TestFilesetB")
        testCollectionB.addFileset(testFilesetB)
        testFilesetC = CouchFileset(database = self.testInit.couchDbName,
                                    url = self.testInit.couchUrl,
                                    name = "TestFilesetC")
        testCollectionC.addFileset(testFilesetC)
        testFilesetD = CouchFileset(database = self.testInit.couchDbName,
                                    url = self.testInit.couchUrl,
                                    name = "TestFilesetD")
        testCollectionC.addFileset(testFilesetD)        

        testFiles = []
        for i in range(5):
            testFile = File(lfn = makeUUID(), size = random.randint(1024, 4096),
                            events = random.randint(1024, 4096))
            testFiles.append(testFile)
            
        testFilesetA.add(testFiles)
        testFilesetB.add(testFiles)
        testFilesetC.add(testFiles)
        testFilesetD.add(testFiles)

        goldenCollectionNames = ["Thunderstruck", "Struckthunder"]
        for collection in svc.listCollections(ownerA):
            self.assertTrue(collection["name"] in goldenCollectionNames,
                            "Error: Missing collection name.")
            goldenCollectionNames.remove(collection["name"])
        self.assertEqual(len(goldenCollectionNames), 0,
                         "Error: Missing collections.")

        goldenFilesetNames = ["TestFilesetC", "TestFilesetD"]
        for fileset in svc.listFilesets(testCollectionD):
            self.assertTrue(fileset["name"] in goldenFilesetNames,
                            "Error: Missing fileset.")
            goldenFilesetNames.remove(fileset["name"])
        self.assertEqual(len(goldenFilesetNames), 0,
                         "Error: Missing filesets.")

        return
Ejemplo n.º 30
0
    def testCreatePopulateDrop(self):
        """
        _testCreatePopulateDrop_

        Test creating, populating and dropping a collection.
        """
        testCollectionA = CouchCollection(database = self.testInit.couchDbName,
                                          url = self.testInit.couchUrl,
                                          name = "Thunderstruck")
        testCollectionB = CouchCollection(database = self.testInit.couchDbName,
                                          url = self.testInit.couchUrl,
                                          name = "StruckThunder")
        testCollectionA.setOwner(self.owner)
        testCollectionB.setOwner(self.owner)
        testCollectionA.create()
        testCollectionB.create()

        # There should be nothing in couch.  Documents are only added for
        # filesets and files.

        testFilesA = []
        for i in range(5):
            testFile = File(lfn = makeUUID(), size = random.randint(1024, 4096),
                            events = random.randint(1024, 4096))
            testFilesA.append(testFile)
        testFilesB = []
        for i in range(10):
            testFile = File(lfn = makeUUID(), size = random.randint(1024, 4096),
                            events = random.randint(1024, 4096))
            testFilesB.append(testFile)

        testFilesetA = CouchFileset(database = self.testInit.couchDbName,
                                    url = self.testInit.couchUrl,
                                    name = "TestFilesetA")
        testFilesetB = CouchFileset(database = self.testInit.couchDbName,
                                    url = self.testInit.couchUrl,
                                    name = "TestFilesetB")
        testFilesetC = CouchFileset(database = self.testInit.couchDbName,
                                    url = self.testInit.couchUrl,
                                    name = "TestFilesetC")
        testCollectionA.addFileset(testFilesetA)
        testCollectionB.addFileset(testFilesetB)
        testCollectionB.addFileset(testFilesetC)
        testFilesetA.add(testFilesA)
        testFilesetB.add(testFilesA)
        testFilesetC.add(testFilesA)
        testFilesetC.add(testFilesB)

        # Drop testCollectionA
        testCollectionA.drop()

        # Try to populate testFilesetA
        testCollectionC = CouchCollection(database = self.testInit.couchDbName,
                                          url = self.testInit.couchUrl,
                                          name = "ThunderStruck")
        testCollectionC.setOwner(self.owner)
        testCollectionC.populate()

        self.assertEqual(len(testCollectionC["filesets"]), 0,
                         "Error: There should be no filesets in this collect.")

        # Try to populate testFilesetB
        testCollectionD = CouchCollection(database = self.testInit.couchDbName,
                                          url = self.testInit.couchUrl,
                                          name = "StruckThunder")
        testCollectionD.setOwner(self.owner)
        testCollectionD.populate()

        for fileset in testCollectionD["filesets"]:
            testFiles = testFilesA
            if fileset["name"] == "TestFilesetC":
                testFiles.extend(testFilesB)

            self.assertEqual(len(testFiles), len(fileset.files.keys()),
                             "Error: Wrong number of files in fileset.")
            for testFile in testFiles:
                self.assertTrue(testFile["lfn"] in fileset.files.keys(),
                                "Error: File is missing.")
                self.assertEqual(testFile["events"],
                                 fileset.files[testFile["lfn"]]["events"],
                                 "Error: Wrong number of events.")
                self.assertEqual(testFile["size"],
                                 fileset.files[testFile["lfn"]]["size"],
                                 "Error: Wrong file size.")

        return
Ejemplo n.º 31
0
    def testCreatePopulateDrop(self):
        """
        _testCreatePopulateDrop_

        Test creating, populating and dropping a collection.
        """
        testCollectionA = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="Thunderstruck")
        testCollectionB = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="StruckThunder")
        testCollectionA.setOwner(self.owner)
        testCollectionB.setOwner(self.owner)
        testCollectionA.create()
        testCollectionB.create()

        # There should be nothing in couch.  Documents are only added for
        # filesets and files.

        testFilesA = []
        for i in range(5):
            testFile = File(lfn=makeUUID(),
                            size=random.randint(1024, 4096),
                            events=random.randint(1024, 4096))
            testFilesA.append(testFile)
        testFilesB = []
        for i in range(10):
            testFile = File(lfn=makeUUID(),
                            size=random.randint(1024, 4096),
                            events=random.randint(1024, 4096))
            testFilesB.append(testFile)

        testFilesetA = CouchFileset(database=self.testInit.couchDbName,
                                    url=self.testInit.couchUrl,
                                    name="TestFilesetA")
        testFilesetB = CouchFileset(database=self.testInit.couchDbName,
                                    url=self.testInit.couchUrl,
                                    name="TestFilesetB")
        testFilesetC = CouchFileset(database=self.testInit.couchDbName,
                                    url=self.testInit.couchUrl,
                                    name="TestFilesetC")
        testCollectionA.addFileset(testFilesetA)
        testCollectionB.addFileset(testFilesetB)
        testCollectionB.addFileset(testFilesetC)
        testFilesetA.add(testFilesA)
        testFilesetB.add(testFilesA)
        testFilesetC.add(testFilesA)
        testFilesetC.add(testFilesB)

        # Drop testCollectionA
        testCollectionA.drop()

        # Try to populate testFilesetA
        testCollectionC = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="ThunderStruck")
        testCollectionC.setOwner(self.owner)
        testCollectionC.populate()

        self.assertEqual(
            len(testCollectionC["filesets"]), 0,
            "Error: There should be no filesets in this collect.")

        # Try to populate testFilesetB
        testCollectionD = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="StruckThunder")
        testCollectionD.setOwner(self.owner)
        testCollectionD.populate()

        for fileset in testCollectionD["filesets"]:
            testFiles = testFilesA
            if fileset["name"] == "TestFilesetC":
                testFiles.extend(testFilesB)

            self.assertEqual(len(testFiles), len(fileset.files.keys()),
                             "Error: Wrong number of files in fileset.")
            for testFile in testFiles:
                self.assertTrue(testFile["lfn"] in fileset.files.keys(),
                                "Error: File is missing.")
                self.assertEqual(testFile["events"],
                                 fileset.files[testFile["lfn"]]["events"],
                                 "Error: Wrong number of events.")
                self.assertEqual(testFile["size"],
                                 fileset.files[testFile["lfn"]]["size"],
                                 "Error: Wrong file size.")

        return
Ejemplo n.º 32
0
    def testDropCount(self):
        """
        _testDropCount_

        Verify that dropping a fileset and counting the files in a fileset works
        correctly.
        """
        testCollectionA = CouchCollection(database = self.testInit.couchDbName,
                                          url = self.testInit.couchUrl, 
                                          name = "Thunderstruck")
        testCollectionB = CouchCollection(database = self.testInit.couchDbName,
                                          url = self.testInit.couchUrl, 
                                          name = "StruckThunder")
        testCollectionA.setOwner(self.owner)
        testCollectionB.setOwner(self.owner)        

        testFiles = []
        for i in range(5):
            testFile = File(lfn = makeUUID(), size = random.randint(1024, 4096),
                            events = random.randint(1024, 4096))
            testFiles.append(testFile)

        testFilesetA = CouchFileset(database = self.testInit.couchDbName,
                                    url = self.testInit.couchUrl,
                                    name = "TestFilesetA")
        testFilesetB = CouchFileset(database = self.testInit.couchDbName,
                                    url = self.testInit.couchUrl,
                                    name = "TestFilesetB")
        testFilesetC = CouchFileset(database = self.testInit.couchDbName,
                                    url = self.testInit.couchUrl,
                                    name = "TestFilesetC")
        testCollectionA.addFileset(testFilesetA)
        testCollectionB.addFileset(testFilesetB)
        testCollectionB.addFileset(testFilesetC)
        testFilesetA.add(testFiles)
        testFilesetB.add(testFiles)
        testFilesetC.add(testFiles)        

        testFilesetC.drop()

        testCollectionC = CouchCollection(database = self.testInit.couchDbName,
                                          url = self.testInit.couchUrl, 
                                          name = "StruckThunder")
        testCollectionC.setOwner(self.owner)
        testCollectionC.populate()

        self.assertEqual(len(testCollectionC["filesets"]), 1,
                         "Error: There should be one fileset in this collection.")
        self.assertEqual(testCollectionC["filesets"][0].fileCount(), 5,
                         "Error: Wrong number of files in fileset.")

        testCollectionD = CouchCollection(database = self.testInit.couchDbName,
                                          url = self.testInit.couchUrl, 
                                          name = "Thunderstruck")
        testCollectionD.setOwner(self.owner)
        testCollectionD.populate()

        self.assertEqual(len(testCollectionD["filesets"]), 1,
                         "Error: There should be one fileset in this collection.")
        self.assertEqual(testCollectionD["filesets"][0].fileCount(), 5,
                         "Error: Wrong number of files in fileset.")        
        return