def failedJobs(self, failedJobs, useMask = True):
        """
        _failedJobs_

        Given a list of failed jobs, sort them into Filesets and record them

        NOTE: jobs must have a non-standard task, workflow, owner and group
        attributes assigned to them.
        """
        for job in failedJobs:
            try:
                taskName = job['task']
                workflow = job['workflow']
            except KeyError as ex:
                msg =  "Missing required, non-standard key %s in job in ACDC.DataCollectionService" % (str(ex))
                logging.error(msg)
                raise ACDCDCSException(msg)

            coll = CouchCollection(database = self.database, url = self.url,
                                   name = workflow,
                                   type = CollectionTypes.DataCollection)
            owner = self.newOwner(job.get("group", "cmsdataops"),
                                  job.get("owner", "cmsdataops"))
            coll.setOwner(owner)
            fileset = CouchFileset(database = self.database, url = self.url,
                                    name = taskName)
            coll.addFileset(fileset)
            if useMask:
                fileset.add(files = job['input_files'], mask = job['mask'])
            else:
                fileset.add(files = job['input_files'])

        return
Example #2
0
    def setupACDCDatabase(self, collectionName, taskPath,
                          user, group):
        """
        _setupACDCDatabase_

        Populate an ACDC database with bogus records
        associated to certain collection name, user and task path.
        """
        acdcServer = CouchService(url = self.testInit.couchUrl,
                                  database = "%s_acdc" % self.couchDBName)
        owner = acdcServer.newOwner(group, user)
        testCollection = CouchCollection(database = self.testInit.couchDbName,
                                          url = self.testInit.couchUrl,
                                          name = collectionName)
        testCollection.setOwner(owner)
        testFileset = CouchFileset(database = self.testInit.couchDbName,
                                    url = self.testInit.couchUrl,
                                    name = taskPath)
        testCollection.addFileset(testFileset)

        testFiles = []
        for _ in range(5):
            testFile = File(lfn = makeUUID(), size = random.randint(1024, 4096),
                            events = random.randint(1024, 4096))
            testFiles.append(testFile)

        testFileset.add(testFiles)
    def failedJobs(self, failedJobs, useMask=True):
        """
        _failedJobs_

        Given a list of failed jobs, sort them into Filesets and record them

        NOTE: jobs must have a non-standard task, workflow attributes assigned to them.
        """
        for job in failedJobs:
            try:
                taskName = job['task']
                workflow = job['workflow']
            except KeyError as ex:
                msg = "Missing required, non-standard key %s in job in ACDC.DataCollectionService" % (
                    str(ex))
                logging.error(msg)
                raise ACDCDCSException(msg)

            coll = CouchCollection(database=self.database,
                                   url=self.url,
                                   name=workflow,
                                   type=CollectionTypes.DataCollection)
            fileset = CouchFileset(database=self.database,
                                   url=self.url,
                                   name=taskName)
            coll.addFileset(fileset)
            if useMask:
                fileset.add(files=job['input_files'], mask=job['mask'])
            else:
                fileset.add(files=job['input_files'])

        return
Example #4
0
    def testListFiles(self):
        """
        _testListFiles_

        Verify that the files iterator works correctly.
        """
        testCollection = CouchCollection(database=self.testInit.couchDbName,
                                         url=self.testInit.couchUrl,
                                         name="Thunderstruck")
        testCollection.setOwner(self.owner)
        testFileset = CouchFileset(database=self.testInit.couchDbName,
                                   url=self.testInit.couchUrl,
                                   name="TestFileset")
        testCollection.addFileset(testFileset)

        testFiles = {}
        for i in range(5):
            lfn = makeUUID()
            testFile = File(lfn=lfn,
                            size=random.randint(1024, 4096),
                            events=random.randint(1024, 4096))
            testFiles[lfn] = testFile
            testFileset.add([testFile])

        for file in testFileset.listFiles():
            self.assertTrue(file["lfn"] in testFiles.keys(),
                            "Error: File missing.")
            self.assertEqual(file["events"], testFiles[file["lfn"]]["events"],
                             "Error: Wrong number of events.")
            self.assertEqual(file["size"], testFiles[file["lfn"]]["size"],
                             "Error: Wrong file size.")
        return
Example #5
0
    def testFileset(self):
        """
        _testFileset_

        Verify that converting an ACDC fileset to a DataStructs fileset works
        correctly.
        """
        testCollection = CouchCollection(database=self.testInit.couchDbName,
                                         url=self.testInit.couchUrl,
                                         name="Thunderstruck")
        testFileset = CouchFileset(database=self.testInit.couchDbName,
                                   url=self.testInit.couchUrl,
                                   name="TestFileset")
        testCollection.addFileset(testFileset)

        testFiles = {}
        for i in range(5):
            lfn = makeUUID()
            testFile = File(lfn=lfn,
                            size=random.randint(1024, 4096),
                            events=random.randint(1024, 4096))
            testFiles[lfn] = testFile
            testFileset.add([testFile])

        for file in testFileset.fileset().files:
            self.assertTrue(file["lfn"] in testFiles, "Error: File missing.")
            self.assertEqual(file["events"], testFiles[file["lfn"]]["events"],
                             "Error: Wrong number of events.")
            self.assertEqual(file["size"], testFiles[file["lfn"]]["size"],
                             "Error: Wrong file size.")
        return
Example #6
0
    def setupACDCDatabase(self, collectionName, taskPath,
                          user, group):
        """
        _setupACDCDatabase_

        Populate an ACDC database with bogus records
        associated to certain collection name, user and task path.
        """
        acdcServer = CouchService(url = self.testInit.couchUrl,
                                  database = "%s_acdc" % self.couchDBName)
        owner = acdcServer.newOwner(group, user)
        testCollection = CouchCollection(database = self.testInit.couchDbName,
                                          url = self.testInit.couchUrl,
                                          name = collectionName)
        testCollection.setOwner(owner)
        testFileset = CouchFileset(database = self.testInit.couchDbName,
                                    url = self.testInit.couchUrl,
                                    name = taskPath)
        testCollection.addFileset(testFileset)

        testFiles = []
        for _ in range(5):
            testFile = File(lfn = makeUUID(), size = random.randint(1024, 4096),
                            events = random.randint(1024, 4096))
            testFiles.append(testFile)

        testFileset.add(testFiles)
Example #7
0
    def failedJobs(self, failedJobs):
        """
        _failedJobs_

        Given a list of failed jobs, sort them into Filesets and record them

        NOTE: jobs must have a non-standard task, workflow, owner and group
        attributes assigned to them.
        """
        for job in failedJobs:
            try:
                taskName = job['task']
                workflow = job['workflow']
            except KeyError, ex:
                msg = "Missing required, non-standard key %s in job in ACDC.DataCollectionService" % (
                    str(ex))
                logging.error(msg)
                raise ACDCDCSException(msg)

            coll = CouchCollection(database=self.database,
                                   url=self.url,
                                   name=workflow,
                                   type=CollectionTypes.DataCollection)
            owner = self.newOwner(job.get("group", "cmsdataops"),
                                  job.get("owner", "cmsdataops"))
            coll.setOwner(owner)
            fileset = CouchFileset(database=self.database,
                                   url=self.url,
                                   name=taskName)
            coll.addFileset(fileset)
            fileset.add(files=job['input_files'], mask=job['mask'])
Example #8
0
    def testFileset(self):
        """
        _testFileset_

        Verify that converting an ACDC fileset to a DataStructs fileset works
        correctly.
        """
        testCollection = CouchCollection(database = self.testInit.couchDbName,
                                         url = self.testInit.couchUrl, 
                                         name = "Thunderstruck")
        testCollection.setOwner(self.owner)
        testFileset = CouchFileset(database = self.testInit.couchDbName,
                                   url = self.testInit.couchUrl,
                                   name = "TestFileset")
        testCollection.addFileset(testFileset)

        testFiles = {}
        for i in range(5):
            lfn = makeUUID()
            testFile = File(lfn = lfn, size = random.randint(1024, 4096),
                            events = random.randint(1024, 4096))
            testFiles[lfn] = testFile
            testFileset.add([testFile])

        for file in testFileset.fileset().files:
            self.assertTrue(file["lfn"] in testFiles.keys(),
                            "Error: File missing.")
            self.assertEqual(file["events"], testFiles[file["lfn"]]["events"],
                             "Error: Wrong number of events.")
            self.assertEqual(file["size"], testFiles[file["lfn"]]["size"],
                             "Error: Wrong file size.")
        return
Example #9
0
    def testDropCount(self):
        """
        _testDropCount_

        Verify that dropping a fileset and counting the files in a fileset works
        correctly.
        """
        testCollectionA = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="Thunderstruck")
        testCollectionB = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="StruckThunder")

        testFiles = []
        for i in range(5):
            testFile = File(lfn=makeUUID(), size=random.randint(1024, 4096),
                            events=random.randint(1024, 4096))
            testFiles.append(testFile)

        testFilesetA = CouchFileset(database=self.testInit.couchDbName,
                                    url=self.testInit.couchUrl,
                                    name="TestFilesetA")
        testFilesetB = CouchFileset(database=self.testInit.couchDbName,
                                    url=self.testInit.couchUrl,
                                    name="TestFilesetB")
        testFilesetC = CouchFileset(database=self.testInit.couchDbName,
                                    url=self.testInit.couchUrl,
                                    name="TestFilesetC")
        testCollectionA.addFileset(testFilesetA)
        testCollectionB.addFileset(testFilesetB)
        testCollectionB.addFileset(testFilesetC)
        testFilesetA.add(testFiles)
        testFilesetB.add(testFiles)
        testFilesetC.add(testFiles)

        testFilesetC.drop()

        testCollectionC = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="StruckThunder")
        testCollectionC.populate()

        self.assertEqual(len(testCollectionC["filesets"]), 1,
                         "Error: There should be one fileset in this collection.")
        self.assertEqual(testCollectionC["filesets"][0].fileCount(), 5,
                         "Error: Wrong number of files in fileset.")

        testCollectionD = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="Thunderstruck")
        testCollectionD.populate()

        self.assertEqual(len(testCollectionD["filesets"]), 1,
                         "Error: There should be one fileset in this collection.")
        self.assertEqual(testCollectionD["filesets"][0].fileCount(), 5,
                         "Error: Wrong number of files in fileset.")
        return
Example #10
0
    def failedJobs(self, failedJobs, useMask=True):
        """
        _failedJobs_

        Given a list of failed jobs, sort them into Filesets and record them

        NOTE: jobs must have a non-standard task, workflow attributes assigned to them.
        """
        # first we sort the list of dictionary by two keys: workflow then task
        failedJobs.sort(key=itemgetter('workflow'))
        failedJobs.sort(key=itemgetter('task'))

        previousWorkflow = ""
        previousTask = ""
        for job in failedJobs:
            try:
                workflow = job['workflow']
                taskName = job['task']
            except KeyError as ex:
                msg = "Missing required, non-standard key %s in job in ACDC.DataCollectionService" % (
                    str(ex))
                logging.error(msg)
                raise ACDCDCSException(msg)

            if workflow != previousWorkflow:
                coll = CouchCollection(database=self.database,
                                       url=self.url,
                                       name=workflow,
                                       type=CollectionTypes.DataCollection)
            if taskName != previousTask:
                fileset = CouchFileset(database=self.database,
                                       url=self.url,
                                       name=taskName)
            coll.addFileset(fileset)
            inputFiles = job['input_files']
            for fInfo in inputFiles:
                if int(fInfo["merged"]
                       ) == 1:  # Looks like Oracle and MySQL return diff type
                    fInfo["parents"] = []
                elif fInfo.get("parents", []):
                    firstParent = next(iter(fInfo["parents"]))
                    if "/store/unmerged/" in firstParent:
                        # parents and input files are unmerged files - need to find merged ascendant
                        fInfo["parents"] = list(
                            getMergedParents(fInfo["parents"]))
                    elif "MCFakeFile" in firstParent:
                        fInfo["parents"] = []
                    # other case, fInfo["parents"] all or merged parents
            if useMask:
                fileset.add(files=inputFiles, mask=job['mask'])
            else:
                fileset.add(files=inputFiles)

            previousWorkflow = workflow
            previousTask = taskName

        return
Example #11
0
    def failedJobs(self, failedJobs, useMask=True):
        """
        _failedJobs_

        Given a list of failed jobs, sort them into Filesets and record them

        NOTE: jobs must have a non-standard task, workflow attributes assigned to them.
        """
        # first we sort the list of dictionary by two keys: workflow then task
        failedJobs.sort(key=itemgetter('workflow'))
        failedJobs.sort(key=itemgetter('task'))

        previousWorkflow = ""
        previousTask = ""
        for job in failedJobs:
            try:
                workflow = job['workflow']
                taskName = job['task']
            except KeyError as ex:
                msg = "Missing required, non-standard key %s in job in ACDC.DataCollectionService" % (str(ex))
                logging.error(msg)
                raise ACDCDCSException(msg)

            if workflow != previousWorkflow:
                coll = CouchCollection(database=self.database, url=self.url,
                                       name=workflow,
                                       type=CollectionTypes.DataCollection)
            if taskName != previousTask:
                fileset = CouchFileset(database=self.database, url=self.url,
                                       name=taskName)
            coll.addFileset(fileset)
            inputFiles = job['input_files']
            for fInfo in inputFiles:
                if int(fInfo["merged"]) == 1:  # Looks like Oracle and MySQL return diff type
                    fInfo["parents"] = []
                elif fInfo.get("parents", []):
                    firstParent = next(iter(fInfo["parents"]))
                    if "/store/unmerged/" in firstParent:
                        # parents and input files are unmerged files - need to find merged ascendant
                        fInfo["parents"] = list(getMergedParents(fInfo["parents"]))
                    elif "MCFakeFile" in firstParent:
                        fInfo["parents"] = []
                    # other case, fInfo["parents"] all or merged parents
            if useMask:
                fileset.add(files=inputFiles, mask=job['mask'])
            else:
                fileset.add(files=inputFiles)

            previousWorkflow = workflow
            previousTask = taskName

        return
Example #12
0
    def testListCollectionsFilesets(self):
        """
        _testListCollectionsFilesets_

        Verify that collections and filesets in ACDC can be listed.
        """
        svc = CouchService(url=self.testInit.couchUrl, database=self.testInit.couchDbName)

        ownerA = svc.newOwner("somegroup", "someuserA")
        ownerB = svc.newOwner("somegroup", "someuserB")

        testCollectionA = CouchCollection(
            database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="Thunderstruck"
        )
        testCollectionA.setOwner(ownerA)
        testCollectionB = CouchCollection(
            database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="Struckthunder"
        )
        testCollectionB.setOwner(ownerA)
        testCollectionC = CouchCollection(
            database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="Thunderstruck"
        )
        testCollectionC.setOwner(ownerB)
        testCollectionD = CouchCollection(
            database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="Thunderstruck"
        )
        testCollectionD.setOwner(ownerB)

        testFilesetA = CouchFileset(database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="TestFilesetA")
        testCollectionA.addFileset(testFilesetA)
        testFilesetB = CouchFileset(database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="TestFilesetB")
        testCollectionB.addFileset(testFilesetB)
        testFilesetC = CouchFileset(database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="TestFilesetC")
        testCollectionC.addFileset(testFilesetC)
        testFilesetD = CouchFileset(database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="TestFilesetD")
        testCollectionC.addFileset(testFilesetD)

        testFiles = []
        for i in range(5):
            testFile = File(lfn=makeUUID(), size=random.randint(1024, 4096), events=random.randint(1024, 4096))
            testFiles.append(testFile)

        testFilesetA.add(testFiles)
        testFilesetB.add(testFiles)
        testFilesetC.add(testFiles)
        testFilesetD.add(testFiles)

        goldenCollectionNames = ["Thunderstruck", "Struckthunder"]
        for collection in svc.listCollections(ownerA):
            self.assertTrue(collection["name"] in goldenCollectionNames, "Error: Missing collection name.")
            goldenCollectionNames.remove(collection["name"])
        self.assertEqual(len(goldenCollectionNames), 0, "Error: Missing collections.")

        goldenFilesetNames = ["TestFilesetC", "TestFilesetD"]
        for fileset in svc.listFilesets(testCollectionD):
            self.assertTrue(fileset["name"] in goldenFilesetNames, "Error: Missing fileset.")
            goldenFilesetNames.remove(fileset["name"])
        self.assertEqual(len(goldenFilesetNames), 0, "Error: Missing filesets.")

        return
Example #13
0
    def populateCouchDB(self):
        """
        _populateCouchDB_

        Populate the ACDC records
        """
        svc = CouchService(url=self.testInit.couchUrl,
                           database=self.testInit.couchDbName)

        ownerA = svc.newOwner("somegroup", "someuserA")
        ownerB = svc.newOwner("somegroup", "someuserB")

        testCollectionA = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="Thunderstruck")
        testCollectionA.setOwner(ownerA)
        testCollectionB = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="Struckthunder")
        testCollectionB.setOwner(ownerA)
        testCollectionC = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="Thunderstruck")
        testCollectionC.setOwner(ownerB)
        testCollectionD = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="Thunderstruck")
        testCollectionD.setOwner(ownerB)

        testFilesetA = CouchFileset(database=self.testInit.couchDbName,
                                    url=self.testInit.couchUrl,
                                    name="TestFilesetA")
        testCollectionA.addFileset(testFilesetA)
        testFilesetB = CouchFileset(database=self.testInit.couchDbName,
                                    url=self.testInit.couchUrl,
                                    name="TestFilesetB")
        testCollectionB.addFileset(testFilesetB)
        testFilesetC = CouchFileset(database=self.testInit.couchDbName,
                                    url=self.testInit.couchUrl,
                                    name="TestFilesetC")
        testCollectionC.addFileset(testFilesetC)
        testFilesetD = CouchFileset(database=self.testInit.couchDbName,
                                    url=self.testInit.couchUrl,
                                    name="TestFilesetD")
        testCollectionC.addFileset(testFilesetD)

        testFiles = []
        for i in range(5):
            testFile = File(lfn=makeUUID(),
                            size=random.randint(1024, 4096),
                            events=random.randint(1024, 4096))
            testFiles.append(testFile)

        testFilesetA.add(testFiles)
        time.sleep(1)
        testFilesetB.add(testFiles)
        time.sleep(1)
        testFilesetC.add(testFiles)
        time.sleep(2)
        testFilesetD.add(testFiles)
Example #14
0
    def testListCollectionsFilesets(self):
        """
        _testListCollectionsFilesets_

        Verify that collections and filesets in ACDC can be listed.
        """
        svc = CouchService(url=self.testInit.couchUrl,
                           database=self.testInit.couchDbName)

        testCollectionA = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="Thunderstruck")
        testCollectionB = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="Struckthunder")
        testCollectionC = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="Thunderstruck")
        testCollectionD = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="Thunderstruck")

        testFilesetA = CouchFileset(database=self.testInit.couchDbName,
                                    url=self.testInit.couchUrl,
                                    name="TestFilesetA")
        testCollectionA.addFileset(testFilesetA)
        testFilesetB = CouchFileset(database=self.testInit.couchDbName,
                                    url=self.testInit.couchUrl,
                                    name="TestFilesetB")
        testCollectionB.addFileset(testFilesetB)
        testFilesetC = CouchFileset(database=self.testInit.couchDbName,
                                    url=self.testInit.couchUrl,
                                    name="TestFilesetC")
        testCollectionC.addFileset(testFilesetC)
        testFilesetD = CouchFileset(database=self.testInit.couchDbName,
                                    url=self.testInit.couchUrl,
                                    name="TestFilesetD")
        testCollectionC.addFileset(testFilesetD)

        testFiles = []
        for i in range(5):
            testFile = File(lfn=makeUUID(),
                            size=random.randint(1024, 4096),
                            events=random.randint(1024, 4096))
            testFiles.append(testFile)

        testFilesetA.add(testFiles)
        testFilesetB.add(testFiles)
        testFilesetC.add(testFiles)
        testFilesetD.add(testFiles)

        goldenFilesetNames = ["TestFilesetA", "TestFilesetC", "TestFilesetD"]
        for fileset in svc.listFilesets(testCollectionD):
            self.assertTrue(fileset["name"] in goldenFilesetNames,
                            "Error: Missing fileset.")
            goldenFilesetNames.remove(fileset["name"])
        self.assertEqual(len(goldenFilesetNames), 0,
                         "Error: Missing filesets.")

        return
Example #15
0
    def populateCouchDB(self):
        """
        _populateCouchDB_

        Populate the ACDC records
        """
        svc = CouchService(url = self.testInit.couchUrl,
                           database = self.testInit.couchDbName)

        ownerA = svc.newOwner("somegroup", "someuserA")
        ownerB = svc.newOwner("somegroup", "someuserB")

        testCollectionA = CouchCollection(database = self.testInit.couchDbName,
                                          url = self.testInit.couchUrl,
                                          name = "Thunderstruck")
        testCollectionA.setOwner(ownerA)
        testCollectionB = CouchCollection(database = self.testInit.couchDbName,
                                          url = self.testInit.couchUrl,
                                          name = "Struckthunder")
        testCollectionB.setOwner(ownerA)
        testCollectionC = CouchCollection(database = self.testInit.couchDbName,
                                          url = self.testInit.couchUrl,
                                          name = "Thunderstruck")
        testCollectionC.setOwner(ownerB)
        testCollectionD = CouchCollection(database = self.testInit.couchDbName,
                                          url = self.testInit.couchUrl,
                                          name = "Thunderstruck")
        testCollectionD.setOwner(ownerB)

        testFilesetA = CouchFileset(database = self.testInit.couchDbName,
                                    url = self.testInit.couchUrl,
                                    name = "TestFilesetA")
        testCollectionA.addFileset(testFilesetA)
        testFilesetB = CouchFileset(database = self.testInit.couchDbName,
                                    url = self.testInit.couchUrl,
                                    name = "TestFilesetB")
        testCollectionB.addFileset(testFilesetB)
        testFilesetC = CouchFileset(database = self.testInit.couchDbName,
                                    url = self.testInit.couchUrl,
                                    name = "TestFilesetC")
        testCollectionC.addFileset(testFilesetC)
        testFilesetD = CouchFileset(database = self.testInit.couchDbName,
                                    url = self.testInit.couchUrl,
                                    name = "TestFilesetD")
        testCollectionC.addFileset(testFilesetD)

        testFiles = []
        for i in range(5):
            testFile = File(lfn = makeUUID(), size = random.randint(1024, 4096),
                            events = random.randint(1024, 4096))
            testFiles.append(testFile)

        testFilesetA.add(testFiles)
        time.sleep(1)
        testFilesetB.add(testFiles)
        time.sleep(1)
        testFilesetC.add(testFiles)
        time.sleep(2)
        testFilesetD.add(testFiles)
Example #16
0
    def populateCouchDB(self):
        """
        _populateCouchDB_

        Populate the ACDC records
        """
        testCollectionA = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="Thunderstruck")
        testCollectionB = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="Struckthunder")
        testCollectionC = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="Thunderstruck")
        testCollectionD = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="Thunderstruck")

        testFilesetA = CouchFileset(database=self.testInit.couchDbName,
                                    url=self.testInit.couchUrl,
                                    name="TestFilesetA")
        testCollectionA.addFileset(testFilesetA)
        testFilesetB = CouchFileset(database=self.testInit.couchDbName,
                                    url=self.testInit.couchUrl,
                                    name="TestFilesetB")
        testCollectionB.addFileset(testFilesetB)
        testFilesetC = CouchFileset(database=self.testInit.couchDbName,
                                    url=self.testInit.couchUrl,
                                    name="TestFilesetC")
        testCollectionC.addFileset(testFilesetC)
        testFilesetD = CouchFileset(database=self.testInit.couchDbName,
                                    url=self.testInit.couchUrl,
                                    name="TestFilesetD")
        testCollectionD.addFileset(testFilesetD)

        testFiles = []
        for i in range(5):
            testFile = File(lfn=makeUUID(), size=random.randint(1024, 4096),
                            events=random.randint(1024, 4096))
            testFiles.append(testFile)

        testFilesetA.add(testFiles)
        time.sleep(1)
        testFilesetB.add(testFiles)
        time.sleep(1)
        testFilesetC.add(testFiles)
        time.sleep(1)
        testFilesetD.add(testFiles)
        # Alan: unsure why to return this specific collection
        return testCollectionD
Example #17
0
    def failedJobs(self, failedJobs, useMask=True):
        """
        _failedJobs_

        Given a list of failed jobs, sort them into Filesets and record them

        NOTE: jobs must have a non-standard task, workflow attributes assigned to them.
        """
        for job in failedJobs:
            try:
                taskName = job['task']
                workflow = job['workflow']
            except KeyError as ex:
                msg = "Missing required, non-standard key %s in job in ACDC.DataCollectionService" % (
                    str(ex))
                logging.error(msg)
                raise ACDCDCSException(msg)

            coll = CouchCollection(database=self.database,
                                   url=self.url,
                                   name=workflow,
                                   type=CollectionTypes.DataCollection)
            fileset = CouchFileset(database=self.database,
                                   url=self.url,
                                   name=taskName)
            coll.addFileset(fileset)
            inputFiles = job['input_files']
            for fInfo in inputFiles:
                if fInfo["merged"] and ("parents" in fInfo) and \
                   len(fInfo["parents"]) and ("/store/unmerged/" in next(iter(fInfo["parents"]))):
                    # remove parents files from acdc doucment if they are unmerged files
                    fInfo["parents"] = []
            if useMask:
                fileset.add(files=inputFiles, mask=job['mask'])
            else:
                fileset.add(files=inputFiles)

        return
    def testCreatePopulateDrop(self):
        """
        _testCreatePopulateDrop_

        Test creating, populating and dropping a collection.
        """
        testCollectionA = CouchCollection(database = self.testInit.couchDbName,
                                          url = self.testInit.couchUrl,
                                          name = "Thunderstruck")
        testCollectionB = CouchCollection(database = self.testInit.couchDbName,
                                          url = self.testInit.couchUrl,
                                          name = "StruckThunder")
        testCollectionA.setOwner(self.owner)
        testCollectionB.setOwner(self.owner)
        testCollectionA.create()
        testCollectionB.create()

        # There should be nothing in couch.  Documents are only added for
        # filesets and files.

        testFilesA = []
        for i in range(5):
            testFile = File(lfn = makeUUID(), size = random.randint(1024, 4096),
                            events = random.randint(1024, 4096))
            testFilesA.append(testFile)
        testFilesB = []
        for i in range(10):
            testFile = File(lfn = makeUUID(), size = random.randint(1024, 4096),
                            events = random.randint(1024, 4096))
            testFilesB.append(testFile)

        testFilesetA = CouchFileset(database = self.testInit.couchDbName,
                                    url = self.testInit.couchUrl,
                                    name = "TestFilesetA")
        testFilesetB = CouchFileset(database = self.testInit.couchDbName,
                                    url = self.testInit.couchUrl,
                                    name = "TestFilesetB")
        testFilesetC = CouchFileset(database = self.testInit.couchDbName,
                                    url = self.testInit.couchUrl,
                                    name = "TestFilesetC")
        testCollectionA.addFileset(testFilesetA)
        testCollectionB.addFileset(testFilesetB)
        testCollectionB.addFileset(testFilesetC)
        testFilesetA.add(testFilesA)
        testFilesetB.add(testFilesA)
        testFilesetC.add(testFilesA)
        testFilesetC.add(testFilesB)

        # Drop testCollectionA
        testCollectionA.drop()

        # Try to populate testFilesetA
        testCollectionC = CouchCollection(database = self.testInit.couchDbName,
                                          url = self.testInit.couchUrl,
                                          name = "ThunderStruck")
        testCollectionC.setOwner(self.owner)
        testCollectionC.populate()

        self.assertEqual(len(testCollectionC["filesets"]), 0,
                         "Error: There should be no filesets in this collect.")

        # Try to populate testFilesetB
        testCollectionD = CouchCollection(database = self.testInit.couchDbName,
                                          url = self.testInit.couchUrl,
                                          name = "StruckThunder")
        testCollectionD.setOwner(self.owner)
        testCollectionD.populate()

        for fileset in testCollectionD["filesets"]:
            testFiles = testFilesA
            if fileset["name"] == "TestFilesetC":
                testFiles.extend(testFilesB)

            self.assertEqual(len(testFiles), len(fileset.files.keys()),
                             "Error: Wrong number of files in fileset.")
            for testFile in testFiles:
                self.assertTrue(testFile["lfn"] in fileset.files.keys(),
                                "Error: File is missing.")
                self.assertEqual(testFile["events"],
                                 fileset.files[testFile["lfn"]]["events"],
                                 "Error: Wrong number of events.")
                self.assertEqual(testFile["size"],
                                 fileset.files[testFile["lfn"]]["size"],
                                 "Error: Wrong file size.")

        return
Example #19
0
    def testCreatePopulateDrop(self):
        """
        _testCreatePopulateDrop_

        Test creating, populating and dropping a collection.
        """
        testCollectionA = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="Thunderstruck")
        testCollectionB = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="StruckThunder")
        testCollectionA.setOwner(self.owner)
        testCollectionB.setOwner(self.owner)
        testCollectionA.create()
        testCollectionB.create()

        # There should be nothing in couch.  Documents are only added for
        # filesets and files.

        testFilesA = []
        for i in range(5):
            testFile = File(lfn=makeUUID(),
                            size=random.randint(1024, 4096),
                            events=random.randint(1024, 4096))
            testFilesA.append(testFile)
        testFilesB = []
        for i in range(10):
            testFile = File(lfn=makeUUID(),
                            size=random.randint(1024, 4096),
                            events=random.randint(1024, 4096))
            testFilesB.append(testFile)

        testFilesetA = CouchFileset(database=self.testInit.couchDbName,
                                    url=self.testInit.couchUrl,
                                    name="TestFilesetA")
        testFilesetB = CouchFileset(database=self.testInit.couchDbName,
                                    url=self.testInit.couchUrl,
                                    name="TestFilesetB")
        testFilesetC = CouchFileset(database=self.testInit.couchDbName,
                                    url=self.testInit.couchUrl,
                                    name="TestFilesetC")
        testCollectionA.addFileset(testFilesetA)
        testCollectionB.addFileset(testFilesetB)
        testCollectionB.addFileset(testFilesetC)
        testFilesetA.add(testFilesA)
        testFilesetB.add(testFilesA)
        testFilesetC.add(testFilesA)
        testFilesetC.add(testFilesB)

        # Drop testCollectionA
        testCollectionA.drop()

        # Try to populate testFilesetA
        testCollectionC = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="ThunderStruck")
        testCollectionC.setOwner(self.owner)
        testCollectionC.populate()

        self.assertEqual(
            len(testCollectionC["filesets"]), 0,
            "Error: There should be no filesets in this collect.")

        # Try to populate testFilesetB
        testCollectionD = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="StruckThunder")
        testCollectionD.setOwner(self.owner)
        testCollectionD.populate()

        for fileset in testCollectionD["filesets"]:
            testFiles = testFilesA
            if fileset["name"] == "TestFilesetC":
                testFiles.extend(testFilesB)

            self.assertEqual(len(testFiles), len(fileset.files.keys()),
                             "Error: Wrong number of files in fileset.")
            for testFile in testFiles:
                self.assertTrue(testFile["lfn"] in fileset.files.keys(),
                                "Error: File is missing.")
                self.assertEqual(testFile["events"],
                                 fileset.files[testFile["lfn"]]["events"],
                                 "Error: Wrong number of events.")
                self.assertEqual(testFile["size"],
                                 fileset.files[testFile["lfn"]]["size"],
                                 "Error: Wrong file size.")

        return