Example #1
0
    def createSubscription(self,
                           nFiles,
                           lumisPerFile,
                           twoSites=False,
                           rand=False):
        """
        _createSubscription_

        Create a subscription for testing
        """

        baseName = makeUUID()

        testFileset = Fileset(name=baseName)
        testFileset.create()
        parentFile = File('%s_parent' % baseName,
                          size=1000,
                          events=100,
                          locations=set(["T1_US_FNAL_Disk"]))
        parentFile.create()
        for i in range(nFiles):
            newFile = File(lfn='%s_%i' % (baseName, i),
                           size=1000,
                           events=100,
                           locations="T1_US_FNAL_Disk")
            lumis = []
            for lumi in range(lumisPerFile):
                if rand:
                    lumis.append(random.randint(1000 * i, 1000 * (i + 1)))
                else:
                    lumis.append((100 * i) + lumi)
            newFile.addRun(Run(i, *lumis))
            newFile.create()
            newFile.addParent(parentFile['lfn'])
            testFileset.addFile(newFile)
        if twoSites:
            for i in range(nFiles):
                newFile = File(lfn='%s_%i_2' % (baseName, i),
                               size=1000,
                               events=100,
                               locations="T2_CH_CERN")
                lumis = []
                for lumi in range(lumisPerFile):
                    if rand:
                        lumis.append(random.randint(1000 * i, 1000 * (i + 1)))
                    else:
                        lumis.append((100 * i) + lumi)
                newFile.addRun(Run(i, *lumis))
                newFile.create()
                newFile.addParent(parentFile['lfn'])
                testFileset.addFile(newFile)
        testFileset.commit()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="LumiBased",
                                        type="Processing")
        testSubscription.create()

        return testSubscription
Example #2
0
    def testLotsOfAncestors(self):
        """
        _testLotsOfAncestors_

        Create a file with 15 parents with each parent having 100 parents to
        verify that the query to return grandparents works correctly.
        """
        raise nose.SkipTest
        testFileA = File(lfn = "/this/is/a/lfnA", size = 1024, events = 10,
                        checksums = {"cksum": "1"}, locations = "se1.fnal.gov")
        testFileA.create()

        for i in xrange(15):
            testParent = File(lfn = makeUUID(), size = 1024, events = 10,
                              checksums = {"cksum": "1"}, locations = "se1.fnal.gov")
            testParent.create()
            testFileA.addParent(testParent["lfn"])

            for i in xrange(100):
                testGParent = File(lfn = makeUUID(), size = 1024, events = 10,
                                   checksums = {"cksum": "1"}, locations = "se1.fnal.gov")
                testGParent.create()
                testParent.addParent(testGParent["lfn"])                

        assert len(testFileA.getAncestors(level = 2, type = "lfn")) == 1500, \
               "ERROR: Incorrect grand parents returned"
        
        return
Example #3
0
    def testGetInfo(self):
        """
        _testGetInfo_

        Test the getInfo() method of the File class to make sure that it
        returns the correct information.
        """
        testFileParent = File(lfn = "/this/is/a/parent/lfn", size = 1024,
                              events = 20, checksums={'cksum': 1111})
        testFileParent.addRun(Run(1, *[45]))
        testFileParent.create()

        testFile = File(lfn = "/this/is/a/lfn", size = 1024, events = 10, checksums={'cksum': 222})
        testFile.addRun(Run(1, *[45]))
        testFile.addRun(Run(2, *[46, 47]))
        testFile.addRun(Run(2, *[47, 48]))
        testFile.create()
        testFile.setLocation(se = "se1.fnal.gov", immediateSave = False)
        testFile.setLocation(se = "se1.cern.ch", immediateSave = False)
        testFile.addParent("/this/is/a/parent/lfn")

        info = testFile.getInfo()

        assert info[0] == testFile["lfn"], \
               "ERROR: File returned wrong LFN"
        
        assert info[1] == testFile["id"], \
               "ERROR: File returned wrong ID"
        
        assert info[2] == testFile["size"], \
               "ERROR: File returned wrong size"
        
        assert info[3] == testFile["events"], \
               "ERROR: File returned wrong events"
        
        assert info[4] == testFile["checksums"], \
               "ERROR: File returned wrong cksum"
        
        assert len(info[5]) == 2, \
		      "ERROR: File returned wrong runs"
        
        assert info[5] == [Run(1, *[45]), Run(2, *[46, 47, 48])], \
               "Error: Run hasn't been combined correctly"
               
        assert len(info[6]) == 2, \
               "ERROR: File returned wrong locations"

        for testLocation in info[6]:
            assert testLocation in ["se1.fnal.gov", "se1.cern.ch"], \
                   "ERROR: File returned wrong locations"

        assert len(info[7]) == 1, \
               "ERROR: File returned wrong parents"

        assert info[7][0] == testFileParent, \
               "ERROR: File returned wrong parents"

        testFile.delete()
        testFileParent.delete()
        return
Example #4
0
    def testGetParentLFNs(self):
        """
        _testGetParentLFNs_

        Create three files and set them to be parents of a fourth file.  Check
        to make sure that getParentLFNs() on the child file returns the correct
        LFNs.
        """
        testFileParentA = File(lfn="/this/is/a/parent/lfnA",
                               size=1024,
                               events=20,
                               checksums={'cksum': 1})
        testFileParentA.addRun(Run(1, *[45]))
        testFileParentB = File(lfn="/this/is/a/parent/lfnB",
                               size=1024,
                               events=20,
                               checksums={'cksum': 2})
        testFileParentB.addRun(Run(1, *[45]))
        testFileParentC = File(lfn="/this/is/a/parent/lfnC",
                               size=1024,
                               events=20,
                               checksums={'cksum': 3})
        testFileParentC.addRun(Run(1, *[45]))

        testFileParentA.create()
        testFileParentB.create()
        testFileParentC.create()

        testFile = File(lfn="/this/is/a/lfn",
                        size=1024,
                        events=10,
                        checksums={'cksum': 1})
        testFile.addRun(Run(1, *[45]))
        testFile.create()

        testFile.addParent(testFileParentA["lfn"])
        testFile.addParent(testFileParentB["lfn"])
        testFile.addParent(testFileParentC["lfn"])

        parentLFNs = testFile.getParentLFNs()

        assert len(parentLFNs) == 3, \
               "ERROR: Child does not have the right amount of parents"

        goldenLFNs = [
            "/this/is/a/parent/lfnA", "/this/is/a/parent/lfnB",
            "/this/is/a/parent/lfnC"
        ]
        for parentLFN in parentLFNs:
            assert parentLFN in goldenLFNs, \
                   "ERROR: Unknown parent lfn"
            goldenLFNs.remove(parentLFN)

        testFile.delete()
        testFileParentA.delete()
        testFileParentB.delete()
        testFileParentC.delete()
        return
Example #5
0
    def createSubscription(self, nFiles, lumisPerFile, twoSites = False, rand = False):
        """
        _createSubscription_

        Create a subscription for testing
        """

        baseName = makeUUID()

        testFileset = Fileset(name = baseName)
        testFileset.create()
        parentFile = File('%s_parent' % (baseName), size = 1000, events = 100,
                          locations = set(["somese.cern.ch"]))
        parentFile.create()
        for i in range(nFiles):
            newFile = File(lfn = '%s_%i' % (baseName, i), size = 1000,
                           events = 100, locations = "somese.cern.ch")
            lumis = []
            for lumi in range(lumisPerFile):
                if rand:
                    lumis.append(random.randint(1000 * i, 1000 * (i + 1)))
                else:
                    lumis.append((100 * i) + lumi)
            newFile.addRun(Run(i, *lumis))
            newFile.create()
            newFile.addParent(parentFile['lfn'])
            testFileset.addFile(newFile)
        if twoSites:
            for i in range(nFiles):
                newFile = File(lfn = '%s_%i_2' % (baseName, i), size = 1000,
                               events = 100, locations = "otherse.cern.ch")
                lumis = []
                for lumi in range(lumisPerFile):
                    if rand:
                        lumis.append(random.randint(1000 * i, 1000 * (i + 1)))
                    else:
                        lumis.append((100 * i) + lumi)
                newFile.addRun(Run(i, *lumis))
                newFile.create()
                newFile.addParent(parentFile['lfn'])
                testFileset.addFile(newFile)
        testFileset.commit()


        testSubscription = Subscription(fileset = testFileset,
                                         workflow = self.testWorkflow,
                                         split_algo = "LumiBased",
                                         type = "Processing")
        testSubscription.create()

        return testSubscription
Example #6
0
    def testLocationMerging(self):
        """
        _testLocationMerging_

        Verify that files residing on different SEs are not merged together in
        the same job.
        """
        self.stuffWMBS()

        locationAction = self.daoFactory(classname="Locations.New")
        locationAction.execute(siteName="s2", seName="somese3.cern.ch")

        fileSite2 = File(lfn="fileSite2",
                         size=4098,
                         events=1024,
                         first_event=0,
                         locations=set(["somese3.cern.ch"]))
        fileSite2.addRun(Run(1, *[46]))
        fileSite2.create()
        fileSite2.addParent(self.parentFileSite2["lfn"])

        self.mergeFileset.addFile(fileSite2)
        self.mergeFileset.commit()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.mergeSubscription)

        result = jobFactory(min_merge_size=4097,
                            max_merge_size=99999999,
                            max_merge_events=999999999)

        assert len(result) == 1, \
               "ERROR: More than one JobGroup returned."

        assert len(result[0].jobs) == 2, \
               "ERROR: Two jobs should have been returned."

        for job in result[0].jobs:
            firstInputFile = job.getFiles()[0]
            baseLocation = list(firstInputFile["locations"])[0]

            for inputFile in job.getFiles():
                assert inputFile["locations"] == set(["somese.cern.ch", "somese2.cern.ch"]) or \
                       inputFile["locations"] == set(["somese3.cern.ch"]), \
                       "Error: Wrong number of locations"

                assert list(inputFile["locations"])[0] == baseLocation, \
                       "Error: Wrong location."

        return
Example #7
0
    def testLocationMerging(self):
        """
        _testLocationMerging_

        Verify that files residing on different SEs are not merged together in
        the same job.
        """
        self.stuffWMBS()

        locationAction = self.daoFactory(classname="Locations.New")
        locationAction.execute(siteName="T1_UK_RAL", pnn="T1_UK_RAL_Disk")

        fileSite2 = File(lfn="fileSite2",
                         size=4098,
                         events=1024,
                         first_event=0,
                         locations=set(["T1_UK_RAL_Disk"]))
        fileSite2.addRun(Run(1, *[46]))
        fileSite2.create()
        fileSite2.addParent(self.parentFileSite2["lfn"])

        self.mergeFileset.addFile(fileSite2)
        self.mergeFileset.commit()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.mergeSubscription)

        result = jobFactory(min_merge_size=4097,
                            max_merge_size=99999999,
                            max_merge_events=999999999)

        assert len(result) == 1, \
               "ERROR: More than one JobGroup returned."

        assert len(result[0].jobs) == 2, \
               "ERROR: Two jobs should have been returned."

        ralJobs = 0
        fnalcernJobs = 0
        for job in result[0].jobs:
            if job["possiblePSN"] == set(["T1_UK_RAL"]):
                ralJobs += 1
            elif job["possiblePSN"] == set(["T1_US_FNAL", "T2_CH_CERN"]):
                fnalcernJobs += 1

        self.assertEqual(ralJobs, 1)
        self.assertEqual(fnalcernJobs, 1)

        return
Example #8
0
    def testGetParentLFNs(self):
        """
        _testGetParentLFNs_

        Create three files and set them to be parents of a fourth file.  Check
        to make sure that getParentLFNs() on the child file returns the correct
        LFNs.
        """
        testFileParentA = File(lfn = "/this/is/a/parent/lfnA", size = 1024,
                               events = 20, checksums = {'cksum': 1})
        testFileParentA.addRun(Run(1, *[45]))
        testFileParentB = File(lfn = "/this/is/a/parent/lfnB", size = 1024,
                               events = 20, checksums = {'cksum': 2})
        testFileParentB.addRun(Run(1, *[45]))
        testFileParentC = File(lfn = "/this/is/a/parent/lfnC", size = 1024,
                               events = 20, checksums = {'cksum': 3})
        testFileParentC.addRun(Run( 1, *[45]))

        testFileParentA.create()
        testFileParentB.create()
        testFileParentC.create()

        testFile = File(lfn = "/this/is/a/lfn", size = 1024,
                        events = 10, checksums = {'cksum': 1})
        testFile.addRun(Run( 1, *[45]))
        testFile.create()

        testFile.addParent(testFileParentA["lfn"])
        testFile.addParent(testFileParentB["lfn"])
        testFile.addParent(testFileParentC["lfn"])

        parentLFNs = testFile.getParentLFNs()
        
        assert len(parentLFNs) == 3, \
               "ERROR: Child does not have the right amount of parents"

        goldenLFNs = ["/this/is/a/parent/lfnA",
                      "/this/is/a/parent/lfnB",
                      "/this/is/a/parent/lfnC"]
        for parentLFN in parentLFNs:
            assert parentLFN in goldenLFNs, \
                   "ERROR: Unknown parent lfn"
            goldenLFNs.remove(parentLFN)
                   
        testFile.delete()
        testFileParentA.delete()
        testFileParentB.delete()
        testFileParentC.delete()
        return
Example #9
0
    def testLocationMerging(self):
        """
        _testLocationMerging_

        Verify that files residing on different SEs are not merged together in
        the same job.
        """
        self.stuffWMBS()

        locationAction = self.daoFactory(classname = "Locations.New")
        locationAction.execute(siteName = "s2", seName = "somese3.cern.ch")

        fileSite2 = File(lfn = "fileSite2", size = 4098, events = 1024,
                         first_event = 0, locations = set(["somese3.cern.ch"]))
        fileSite2.addRun(Run(1, *[46]))
        fileSite2.create()
        fileSite2.addParent(self.parentFileSite2["lfn"])

        self.mergeFileset.addFile(fileSite2)
        self.mergeFileset.commit()

        splitter = SplitterFactory()
        jobFactory = splitter(package = "WMCore.WMBS",
                              subscription = self.mergeSubscription)

        result = jobFactory(min_merge_size = 4097, max_merge_size = 99999999,
                            max_merge_events = 999999999)

        assert len(result) == 1, \
               "ERROR: More than one JobGroup returned."

        assert len(result[0].jobs) == 2, \
               "ERROR: Two jobs should have been returned."

        for job in result[0].jobs:
            firstInputFile = job.getFiles()[0]
            baseLocation = list(firstInputFile["locations"])[0]

            for inputFile in job.getFiles():
                assert inputFile["locations"] == set(["somese.cern.ch", "somese2.cern.ch"]) or \
                       inputFile["locations"] == set(["somese3.cern.ch"]), \
                       "Error: Wrong number of locations"

                assert list(inputFile["locations"])[0] == baseLocation, \
                       "Error: Wrong location."

        return
Example #10
0
    def testLoadData(self):
        """
        _testLoadData_

        Test the loading of all data from a file, including run/lumi
        associations, location information and parentage information.
        """
        testFileParentA = File(lfn="/this/is/a/parent/lfnA",
                               size=1024,
                               events=20,
                               checksums={'cksum': 1})
        testFileParentA.addRun(Run(1, *[45]))
        testFileParentB = File(lfn="/this/is/a/parent/lfnB",
                               size=1024,
                               events=20,
                               checksums={'cksum': 1})
        testFileParentB.addRun(Run(1, *[45]))
        testFileParentA.create()
        testFileParentB.create()

        testFileA = File(lfn="/this/is/a/lfn",
                         size=1024,
                         events=10,
                         checksums={'cksum': 1})
        testFileA.addRun(Run(1, *[45]))
        testFileA.create()
        testFileA.setLocation(se="se1.fnal.gov", immediateSave=False)
        testFileA.setLocation(se="se1.cern.ch", immediateSave=False)
        testFileA.addParent("/this/is/a/parent/lfnA")
        testFileA.addParent("/this/is/a/parent/lfnB")
        testFileA.updateLocations()

        testFileB = File(lfn=testFileA["lfn"])
        testFileB.loadData(parentage=1)
        testFileC = File(id=testFileA["id"])
        testFileC.loadData(parentage=1)

        assert testFileA == testFileB, \
               "ERROR: File load by LFN didn't work"

        assert testFileA == testFileC, \
               "ERROR: File load by ID didn't work"

        testFileA.delete()
        testFileParentA.delete()
        testFileParentB.delete()
        return
    def testLocationMerging(self):
        """
        _testLocationMerging_

        Verify that files residing on different SEs are not merged together in
        the same job.
        """
        self.stuffWMBS()

        locationAction = self.daoFactory(classname="Locations.New")
        locationAction.execute(siteName="T1_UK_RAL", pnn="T1_UK_RAL_Disk")

        fileSite2 = File(lfn="fileSite2", size=4098, events=1024,
                         first_event=0, locations={"T1_UK_RAL_Disk"})
        fileSite2.addRun(Run(1, *[46]))
        fileSite2.create()
        fileSite2.addParent(self.parentFileSite2["lfn"])

        self.mergeFileset.addFile(fileSite2)
        self.mergeFileset.commit()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.mergeSubscription)

        result = jobFactory(min_merge_size=4097, max_merge_size=99999999,
                            max_merge_events=999999999)

        assert len(result) == 1, \
            "ERROR: More than one JobGroup returned."

        assert len(result[0].jobs) == 2, \
            "ERROR: Two jobs should have been returned."

        ralJobs = 0
        fnalcernJobs = 0
        for job in result[0].jobs:
            if job["possiblePSN"] == {"T1_UK_RAL"}:
                ralJobs += 1
            elif job["possiblePSN"] == {"T1_US_FNAL", "T2_CH_CERN"}:
                fnalcernJobs += 1

        self.assertEqual(ralJobs, 1)
        self.assertEqual(fnalcernJobs, 1)

        return
Example #12
0
    def testCursor(self):
        """
        _testCursor_

        test the cursor closing is really affected

        create 100 files with 5 parents and  loop 100 times.
        If the cursors are exhausted will crash.?

        TODO: improve for more effective testing.

        """

        raise nose.SkipTest
        fileList = []
        parentFile = None
        for i in range(100):
            testFile = File(lfn="/this/is/a/lfn%s" % i,
                            size=1024,
                            events=10,
                            checksums={"cksum": "1"})
            testFile.addRun(Run(1, *[i]))
            testFile.create()

            for j in range(5):
                parentFile = File(lfn="/this/is/a/lfnP%s" % j,
                                  size=1024,
                                  events=10,
                                  checksums={"cksum": "1"})
                parentFile.addRun(Run(1, *[j]))
                parentFile.create()
                testFile.addParent(parentFile['lfn'])

            fileList.append(testFile)

        for i in range(100):
            for file in fileList:
                file.loadData()
                file.getAncestors(level=2)
                file.getAncestors(level=2, type="lfn")

        return
Example #13
0
    def testGetAncestorLFNs(self):
        """
        _testGenAncestorLFNs_

        Create a series of files that have several generations of parentage
        information.  Verify that the parentage information is reported
        correctly.
        """
        testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10, checksums={"cksum": 1}, locations="se1.fnal.gov")
        testFileA.create()

        testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10, checksums={"cksum": 1}, locations="se1.fnal.gov")
        testFileB.create()

        testFileC = File(lfn="/this/is/a/lfnC", size=1024, events=10, checksums={"cksum": 1}, locations="se1.fnal.gov")
        testFileC.create()

        testFileD = File(lfn="/this/is/a/lfnD", size=1024, events=10, checksums={"cksum": 1}, locations="se1.fnal.gov")
        testFileD.create()

        testFileE = File(lfn="/this/is/a/lfnE", size=1024, events=10, checksums={"cksum": 1}, locations="se1.fnal.gov")
        testFileE.create()

        testFileE = File(lfn="/this/is/a/lfnF", size=1024, events=10, checksums={"cksum": 1}, locations="se1.fnal.gov")
        testFileE.create()

        testFileA.addParent(lfn="/this/is/a/lfnB")
        testFileA.addParent(lfn="/this/is/a/lfnC")
        testFileB.addParent(lfn="/this/is/a/lfnD")
        testFileC.addParent(lfn="/this/is/a/lfnD")
        testFileD.addParent(lfn="/this/is/a/lfnE")
        testFileD.addParent(lfn="/this/is/a/lfnF")

        level1 = ["/this/is/a/lfnB", "/this/is/a/lfnC"]
        level2 = ["/this/is/a/lfnD"]
        level3 = ["/this/is/a/lfnE", "/this/is/a/lfnF"]
        level4 = level5 = []

        decs2 = ["/this/is/a/lfnA"]

        assert testFileA.getAncestors(level=1, type="lfn") == level1, "ERROR: level 1 test failed"
        assert testFileA.getAncestors(level=2, type="lfn") == level2, "ERROR: level 2 test failed"
        assert testFileA.getAncestors(level=3, type="lfn") == level3, "ERROR: level 3 test failed"
        assert testFileA.getAncestors(level=4, type="lfn") == level4, "ERROR: level 4 test failed"
        assert testFileA.getAncestors(level=5, type="lfn") == level5, "ERROR: level 5 test failed"

        assert testFileD.getDescendants(level=1, type="lfn") == level1, "ERROR: level 1 desc test failed"
        assert testFileD.getDescendants(level=2, type="lfn") == decs2, "ERROR: level 2 desc test failed"
        assert testFileD.getDescendants(level=3, type="lfn") == level4, "ERROR: level 3 desc test failed"

        return
Example #14
0
    def testLoadData(self):
        """
        _testLoadData_

        Test the loading of all data from a file, including run/lumi
        associations, location information and parentage information.
        """
        testFileParentA = File(lfn = "/this/is/a/parent/lfnA", size = 1024,
                              events = 20, checksums = {'cksum': 1})
        testFileParentA.addRun(Run( 1, *[45]))
        testFileParentB = File(lfn = "/this/is/a/parent/lfnB", size = 1024,
                              events = 20, checksums = {'cksum': 1})
        testFileParentB.addRun(Run( 1, *[45]))
        testFileParentA.create()
        testFileParentB.create()

        testFileA = File(lfn = "/this/is/a/lfn", size = 1024, events = 10,
                        checksums = {'cksum':1})
        testFileA.addRun(Run( 1, *[45]))
        testFileA.create()
        testFileA.setLocation(se = "se1.fnal.gov", immediateSave = False)
        testFileA.setLocation(se = "se1.cern.ch", immediateSave = False)
        testFileA.addParent("/this/is/a/parent/lfnA")
        testFileA.addParent("/this/is/a/parent/lfnB")
        testFileA.updateLocations()
                                                        
        testFileB = File(lfn = testFileA["lfn"])
        testFileB.loadData(parentage = 1)
        testFileC = File(id = testFileA["id"])
        testFileC.loadData(parentage = 1)

        assert testFileA == testFileB, \
               "ERROR: File load by LFN didn't work"

        assert testFileA == testFileC, \
               "ERROR: File load by ID didn't work"

        testFileA.delete()
        testFileParentA.delete()
        testFileParentB.delete()
        return    
Example #15
0
    def testCursor(self):
        """
        _testCursor_
        
        test the cursor closing is really affected
        
        create 100 files with 5 parents and  loop 100 times.
        If the cursors are exhausted will crash.?
        
        TODO: improve for more effective testing. 

        """
        
        raise nose.SkipTest
        fileList = []
        parentFile = None
        for i in range(100):
            testFile = File(lfn = "/this/is/a/lfn%s" % i, size = 1024, events = 10,
                            checksums = {"cksum": "1"})
            testFile.addRun(Run(1, *[i]))
            testFile.create()
            
            for j in range(5):
                parentFile = File(lfn = "/this/is/a/lfnP%s" % j, size = 1024,
                                  events = 10, checksums = {"cksum": "1"})
                parentFile.addRun(Run(1, *[j]))
                parentFile.create()
                testFile.addParent(parentFile['lfn'])
    
            fileList.append(testFile)
            
        for i in range(100):
            for file in fileList:
                file.loadData()
                file.getAncestors(level = 2)
                file.getAncestors(level = 2, type = "lfn")
        
        return
Example #16
0
    def testLotsOfAncestors(self):
        """
        _testLotsOfAncestors_

        Create a file with 15 parents with each parent having 100 parents to
        verify that the query to return grandparents works correctly.
        """
        raise nose.SkipTest
        testFileA = File(lfn="/this/is/a/lfnA",
                         size=1024,
                         events=10,
                         checksums={"cksum": "1"},
                         locations="se1.fnal.gov")
        testFileA.create()

        for i in xrange(15):
            testParent = File(lfn=makeUUID(),
                              size=1024,
                              events=10,
                              checksums={"cksum": "1"},
                              locations="se1.fnal.gov")
            testParent.create()
            testFileA.addParent(testParent["lfn"])

            for i in xrange(100):
                testGParent = File(lfn=makeUUID(),
                                   size=1024,
                                   events=10,
                                   checksums={"cksum": "1"},
                                   locations="se1.fnal.gov")
                testGParent.create()
                testParent.addParent(testGParent["lfn"])

        assert len(testFileA.getAncestors(level=2, type="lfn")) == 1500, \
            "ERROR: Incorrect grand parents returned"

        return
Example #17
0
    def stuffWMBS(self, injected=True):
        """
        _stuffWMBS_

        Insert some dummy jobs, jobgroups, filesets, files and subscriptions
        into WMBS to test job creation.  Three completed job groups each
        containing several files are injected.  Another incomplete job group is
        also injected.  Also files are added to the "Mergeable" subscription as
        well as to the output fileset for their jobgroups.
        """
        locationAction = self.daoFactory(classname="Locations.New")
        locationAction.execute(siteName="T2_CH_CERN", pnn="T2_CH_CERN")
        locationAction.execute(siteName="T1_US_FNAL", pnn="T2_CH_CERN")

        changeStateDAO = self.daoFactory(classname="Jobs.ChangeState")

        self.mergeFileset = Fileset(name="mergeFileset")
        self.mergeFileset.create()
        self.bogusFileset = Fileset(name="bogusFileset")
        self.bogusFileset.create()

        self.mergeMergedFileset = Fileset(name="mergeMergedFileset")
        self.mergeMergedFileset.create()
        self.bogusMergedFileset = Fileset(name="bogusMergedFileset")
        self.bogusMergedFileset.create()

        mergeWorkflow = Workflow(name="mergeWorkflow", spec="bunk2",
                                 owner="Steve", task="Test")
        mergeWorkflow.create()
        markWorkflow = self.daoFactory(classname="Workflow.MarkInjectedWorkflows")
        markWorkflow.execute(names=[mergeWorkflow.name], injected=injected)

        self.mergeSubscription = Subscription(fileset=self.mergeFileset,
                                              workflow=mergeWorkflow,
                                              split_algo="WMBSMergeBySize")
        self.mergeSubscription.create()
        self.bogusSubscription = Subscription(fileset=self.bogusFileset,
                                              workflow=mergeWorkflow,
                                              split_algo="WMBSMergeBySize")

        inputFileset = Fileset(name="inputFileset")
        inputFileset.create()

        inputWorkflow = Workflow(name="inputWorkflow", spec="input",
                                 owner="Steve", task="Test")
        inputWorkflow.create()
        inputWorkflow.addOutput("output", self.mergeFileset,
                                self.mergeMergedFileset)
        inputWorkflow.addOutput("output2", self.bogusFileset,
                                self.bogusMergedFileset)
        bogusInputWorkflow = Workflow(name="bogusInputWorkflow", spec="input",
                                      owner="Steve", task="Test")
        bogusInputWorkflow.create()

        inputSubscription = Subscription(fileset=inputFileset,
                                         workflow=inputWorkflow)
        inputSubscription.create()
        bogusInputSubscription = Subscription(fileset=inputFileset,
                                              workflow=bogusInputWorkflow)
        bogusInputSubscription.create()

        parentFile1 = File(lfn="parentFile1")
        parentFile1.create()
        parentFile2 = File(lfn="parentFile2")
        parentFile2.create()
        parentFile3 = File(lfn="parentFile3")
        parentFile3.create()
        parentFile4 = File(lfn="parentFile4")
        parentFile4.create()
        self.parentFileSite2 = File(lfn="parentFileSite2")
        self.parentFileSite2.create()

        jobGroup1 = JobGroup(subscription=inputSubscription)
        jobGroup1.create()
        jobGroup2 = JobGroup(subscription=inputSubscription)
        jobGroup2.create()
        jobGroup3 = JobGroup(subscription=bogusInputSubscription)
        jobGroup3.create()

        testJob1 = Job()
        testJob1.addFile(parentFile1)
        testJob1.create(jobGroup1)
        testJob1["state"] = "cleanout"
        testJob1["oldstate"] = "new"
        testJob1["couch_record"] = "somejive"
        testJob1["retry_count"] = 0
        testJob1["outcome"] = "success"
        testJob1.save()
        changeStateDAO.execute([testJob1])

        testJob1A = Job()
        testJob1A.addFile(parentFile1)
        testJob1A.create(jobGroup3)
        testJob1A["state"] = "cleanout"
        testJob1A["oldstate"] = "new"
        testJob1A["couch_record"] = "somejive"
        testJob1A["retry_count"] = 0
        testJob1A["outcome"] = "failure"
        testJob1A.save()
        changeStateDAO.execute([testJob1A])

        testJob2 = Job()
        testJob2.addFile(parentFile2)
        testJob2.create(jobGroup1)
        testJob2["state"] = "cleanout"
        testJob2["oldstate"] = "new"
        testJob2["couch_record"] = "somejive"
        testJob2["retry_count"] = 0
        testJob2["outcome"] = "success"
        testJob2.save()
        changeStateDAO.execute([testJob2])

        testJob3 = Job()
        testJob3.addFile(parentFile3)
        testJob3.create(jobGroup2)
        testJob3["state"] = "cleanout"
        testJob3["oldstate"] = "new"
        testJob3["couch_record"] = "somejive"
        testJob3["retry_count"] = 0
        testJob3["outcome"] = "success"
        testJob3.save()
        changeStateDAO.execute([testJob3])

        testJob4 = Job()
        testJob4.addFile(parentFile4)
        testJob4.create(jobGroup2)
        testJob4["state"] = "cleanout"
        testJob4["oldstate"] = "new"
        testJob4["couch_record"] = "somejive"
        testJob4["retry_count"] = 0
        testJob4["outcome"] = "failure"
        testJob4.save()
        changeStateDAO.execute([testJob4])

        # We'll simulate a failed split by event job that the merger should
        # ignore.
        parentFile5 = File(lfn="parentFile5")
        parentFile5.create()

        testJob5 = Job()
        testJob5.addFile(parentFile5)
        testJob5.create(jobGroup2)
        testJob5["state"] = "cleanout"
        testJob5["oldstate"] = "new"
        testJob5["couch_record"] = "somejive"
        testJob5["retry_count"] = 0
        testJob5["outcome"] = "success"
        testJob5.save()
        changeStateDAO.execute([testJob5])

        testJob6 = Job()
        testJob6.addFile(parentFile5)
        testJob6.create(jobGroup2)
        testJob6["state"] = "cleanout"
        testJob6["oldstate"] = "new"
        testJob6["couch_record"] = "somejive"
        testJob6["retry_count"] = 0
        testJob6["outcome"] = "failure"
        testJob6.save()
        changeStateDAO.execute([testJob6])

        testJob7 = Job()
        testJob7.addFile(self.parentFileSite2)
        testJob7.create(jobGroup2)
        testJob7["state"] = "cleanout"
        testJob7["oldstate"] = "new"
        testJob7["couch_record"] = "somejive"
        testJob7["retry_count"] = 0
        testJob7["outcome"] = "success"
        testJob7.save()
        changeStateDAO.execute([testJob7])

        badFile1 = File(lfn="badFile1", size=10241024, events=10241024,
                        first_event=0, locations={"T2_CH_CERN"})
        badFile1.addRun(Run(1, *[45]))
        badFile1.create()
        badFile1.addParent(parentFile5["lfn"])

        file1 = File(lfn="file1", size=1024, events=1024, first_event=0,
                     locations={"T2_CH_CERN"})
        file1.addRun(Run(1, *[45]))
        file1.create()
        file1.addParent(parentFile1["lfn"])
        file2 = File(lfn="file2", size=1024, events=1024,
                     first_event=1024, locations={"T2_CH_CERN"})
        file2.addRun(Run(1, *[45]))
        file2.create()
        file2.addParent(parentFile1["lfn"])
        file3 = File(lfn="file3", size=1024, events=1024,
                     first_event=2048, locations={"T2_CH_CERN"})
        file3.addRun(Run(1, *[45]))
        file3.create()
        file3.addParent(parentFile1["lfn"])
        file4 = File(lfn="file4", size=1024, events=1024,
                     first_event=3072, locations={"T2_CH_CERN"})
        file4.addRun(Run(1, *[45]))
        file4.create()
        file4.addParent(parentFile1["lfn"])

        fileA = File(lfn="fileA", size=1024, events=1024,
                     first_event=0, locations={"T2_CH_CERN"})
        fileA.addRun(Run(1, *[46]))
        fileA.create()
        fileA.addParent(parentFile2["lfn"])
        fileB = File(lfn="fileB", size=1024, events=1024,
                     first_event=1024, locations={"T2_CH_CERN"})
        fileB.addRun(Run(1, *[46]))
        fileB.create()
        fileB.addParent(parentFile2["lfn"])
        fileC = File(lfn="fileC", size=1024, events=1024,
                     first_event=2048, locations={"T2_CH_CERN"})
        fileC.addRun(Run(1, *[46]))
        fileC.create()
        fileC.addParent(parentFile2["lfn"])

        fileI = File(lfn="fileI", size=1024, events=1024,
                     first_event=0, locations={"T2_CH_CERN"})
        fileI.addRun(Run(2, *[46]))
        fileI.create()
        fileI.addParent(parentFile3["lfn"])
        fileII = File(lfn="fileII", size=1024, events=1024,
                      first_event=1024, locations={"T2_CH_CERN"})
        fileII.addRun(Run(2, *[46]))
        fileII.create()
        fileII.addParent(parentFile3["lfn"])
        fileIII = File(lfn="fileIII", size=1024, events=1024,
                       first_event=2048, locations={"T2_CH_CERN"})
        fileIII.addRun(Run(2, *[46]))
        fileIII.create()
        fileIII.addParent(parentFile3["lfn"])
        fileIV = File(lfn="fileIV", size=1024, events=1024,
                      first_event=3072, locations={"T2_CH_CERN"})
        fileIV.addRun(Run(2, *[46]))
        fileIV.create()
        fileIV.addParent(parentFile3["lfn"])

        fileX = File(lfn="badFileA", size=1024, events=1024,
                     first_event=0, locations={"T2_CH_CERN"})
        fileX.addRun(Run(1, *[47]))
        fileX.create()
        fileX.addParent(parentFile4["lfn"])
        fileY = File(lfn="badFileB", size=1024, events=1024,
                     first_event=1024, locations={"T2_CH_CERN"})
        fileY.addRun(Run(1, *[47]))
        fileY.create()
        fileY.addParent(parentFile4["lfn"])
        fileZ = File(lfn="badFileC", size=1024, events=1024,
                     first_event=2048, locations={"T2_CH_CERN"})
        fileZ.addRun(Run(1, *[47]))
        fileZ.create()
        fileZ.addParent(parentFile4["lfn"])

        jobGroup1.output.addFile(file1)
        jobGroup1.output.addFile(file2)
        jobGroup1.output.addFile(file3)
        jobGroup1.output.addFile(file4)
        jobGroup1.output.addFile(fileA)
        jobGroup1.output.addFile(fileB)
        jobGroup1.output.addFile(fileC)
        jobGroup1.output.commit()

        jobGroup2.output.addFile(fileI)
        jobGroup2.output.addFile(fileII)
        jobGroup2.output.addFile(fileIII)
        jobGroup2.output.addFile(fileIV)
        jobGroup2.output.addFile(fileX)
        jobGroup2.output.addFile(fileY)
        jobGroup2.output.addFile(fileZ)
        jobGroup2.output.addFile(badFile1)
        jobGroup2.output.commit()

        for fileObj in [file1, file2, file3, file4, fileA, fileB, fileC, fileI,
                        fileII, fileIII, fileIV, fileX, fileY, fileZ, badFile1]:
            self.mergeFileset.addFile(fileObj)
            self.bogusFileset.addFile(fileObj)

        self.mergeFileset.commit()
        self.bogusFileset.commit()

        return
Example #18
0
    def setUp(self):
        """
        _setUp_

        Create two subscriptions: One that contains a single file and one that
        contains multiple files.
        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules = ["WMCore.WMBS"],
                                useDefault = False)
        
        myThread = threading.currentThread()
        daofactory = DAOFactory(package = "WMCore.WMBS",
                                logger = myThread.logger,
                                dbinterface = myThread.dbi)
        
        locationAction = daofactory(classname = "Locations.New")
        locationAction.execute(siteName = "site1", seName = "somese.cern.ch")
        locationAction.execute(siteName = "site2", seName = "otherse.cern.ch")
        
        self.multipleFileFileset = Fileset(name = "TestFileset1")
        self.multipleFileFileset.create()
        parentFile = File('/parent/lfn/', size = 1000, events = 100, 
                          locations = set(["somese.cern.ch"])) 
        parentFile.create() 
        for i in range(10):
            newFile = File(makeUUID(), size = 1000, events = 100,
                           locations = set(["somese.cern.ch"]))
            newFile.addRun(Run(i, *[45]))
            newFile.create()
            newFile.addParent(lfn = parentFile['lfn']) 
            self.multipleFileFileset.addFile(newFile)
        self.multipleFileFileset.commit()

        self.singleFileFileset = Fileset(name = "TestFileset2")
        self.singleFileFileset.create()
        newFile = File("/some/file/name", size = 1000, events = 100,
                       locations = set(["somese.cern.ch"]))
        newFile.create()
        self.singleFileFileset.addFile(newFile)
        self.singleFileFileset.commit()


        self.multipleSiteFileset = Fileset(name = "TestFileset3")
        self.multipleSiteFileset.create()
        for i in range(5):
            newFile = File(makeUUID(), size = 1000, events = 100,
                           locations = set(["somese.cern.ch"]))
            newFile.create()
            self.multipleSiteFileset.addFile(newFile)
        for i in range(5):
            newFile = File(makeUUID(), size = 1000, events = 100,
                           locations = set(["otherse.cern.ch", "somese.cern.ch"]))
            newFile.create()
            self.multipleSiteFileset.addFile(newFile)
        self.multipleSiteFileset.commit()

        testWorkflow = Workflow(spec = "spec.xml", owner = "Steve",
                                name = "wf001", task="Test" )
        testWorkflow.create()
        self.multipleFileSubscription = Subscription(fileset = self.multipleFileFileset,
                                                     workflow = testWorkflow,
                                                     split_algo = "FileBased",
                                                     type = "Processing")
        self.multipleFileSubscription.create()
        self.singleFileSubscription = Subscription(fileset = self.singleFileFileset,
                                                   workflow = testWorkflow,
                                                   split_algo = "FileBased",
                                                   type = "Processing")
        self.singleFileSubscription.create()

        self.multipleSiteSubscription = Subscription(fileset = self.multipleSiteFileset,
                                                     workflow = testWorkflow,
                                                     split_algo = "FileBased",
                                                     type = "Processing")
        self.multipleSiteSubscription.create()
        return
Example #19
0
    def testGetAncestorLFNs(self):
        """
        _testGenAncestorLFNs_

        Create a series of files that have several generations of parentage
        information.  Verify that the parentage information is reported
        correctly.
        """
        testFileA = File(lfn="/this/is/a/lfnA",
                         size=1024,
                         events=10,
                         checksums={'cksum': 1},
                         locations="se1.fnal.gov")
        testFileA.create()

        testFileB = File(lfn="/this/is/a/lfnB",
                         size=1024,
                         events=10,
                         checksums={'cksum': 1},
                         locations="se1.fnal.gov")
        testFileB.create()

        testFileC = File(lfn="/this/is/a/lfnC",
                         size=1024,
                         events=10,
                         checksums={'cksum': 1},
                         locations="se1.fnal.gov")
        testFileC.create()

        testFileD = File(lfn="/this/is/a/lfnD",
                         size=1024,
                         events=10,
                         checksums={'cksum': 1},
                         locations="se1.fnal.gov")
        testFileD.create()

        testFileE = File(lfn="/this/is/a/lfnE",
                         size=1024,
                         events=10,
                         checksums={'cksum': 1},
                         locations="se1.fnal.gov")
        testFileE.create()

        testFileE = File(lfn="/this/is/a/lfnF",
                         size=1024,
                         events=10,
                         checksums={'cksum': 1},
                         locations="se1.fnal.gov")
        testFileE.create()

        testFileA.addParent(lfn="/this/is/a/lfnB")
        testFileA.addParent(lfn="/this/is/a/lfnC")
        testFileB.addParent(lfn="/this/is/a/lfnD")
        testFileC.addParent(lfn="/this/is/a/lfnD")
        testFileD.addParent(lfn="/this/is/a/lfnE")
        testFileD.addParent(lfn="/this/is/a/lfnF")

        level1 = ["/this/is/a/lfnB", "/this/is/a/lfnC"]
        level2 = ["/this/is/a/lfnD"]
        level3 = ["/this/is/a/lfnE", "/this/is/a/lfnF"]
        level4 = level5 = []

        decs2 = ["/this/is/a/lfnA"]

        assert testFileA.getAncestors(level=1, type='lfn') == level1, \
              "ERROR: level 1 test failed"
        assert testFileA.getAncestors(level=2, type='lfn') == level2, \
              "ERROR: level 2 test failed"
        assert testFileA.getAncestors(level=3, type='lfn') == level3, \
              "ERROR: level 3 test failed"
        assert testFileA.getAncestors(level=4, type='lfn') == level4, \
              "ERROR: level 4 test failed"
        assert testFileA.getAncestors(level=5, type='lfn') == level5, \
              "ERROR: level 5 test failed"

        assert testFileD.getDescendants(level=1, type='lfn') == level1, \
              "ERROR: level 1 desc test failed"
        assert testFileD.getDescendants(level=2, type='lfn') == decs2, \
              "ERROR: level 2 desc test failed"
        assert testFileD.getDescendants(level=3, type='lfn') == level4, \
              "ERROR: level 3 desc test failed"

        return
Example #20
0
    def setUp(self):
        """
        _setUp_

        Create two subscriptions: One that contains a single file and one that
        contains multiple files.
        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules=["WMCore.WMBS"],
                                useDefault=False)

        myThread = threading.currentThread()
        daofactory = DAOFactory(package="WMCore.WMBS",
                                logger=myThread.logger,
                                dbinterface=myThread.dbi)

        locationAction = daofactory(classname="Locations.New")
        locationAction.execute(siteName="T1_US_FNAL", pnn="T1_US_FNAL_Disk")
        locationAction.execute(siteName="T2_CH_CERN", pnn="T2_CH_CERN")

        self.multipleFileFileset = Fileset(name="TestFileset1")
        self.multipleFileFileset.create()
        parentFile = File('/parent/lfn/',
                          size=1000,
                          events=100,
                          locations=set(["T1_US_FNAL_Disk"]))
        parentFile.create()
        for i in range(10):
            newFile = File(makeUUID(),
                           size=1000,
                           events=100,
                           locations=set(["T1_US_FNAL_Disk"]))
            newFile.addRun(Run(i, *[45]))
            newFile.create()
            newFile.addParent(lfn=parentFile['lfn'])
            self.multipleFileFileset.addFile(newFile)
        self.multipleFileFileset.commit()

        self.singleFileFileset = Fileset(name="TestFileset2")
        self.singleFileFileset.create()
        newFile = File("/some/file/name",
                       size=1000,
                       events=100,
                       locations=set(["T1_US_FNAL_Disk"]))
        newFile.create()
        self.singleFileFileset.addFile(newFile)
        self.singleFileFileset.commit()

        self.multipleSiteFileset = Fileset(name="TestFileset3")
        self.multipleSiteFileset.create()
        for i in range(5):
            newFile = File(makeUUID(),
                           size=1000,
                           events=100,
                           locations=set(["T1_US_FNAL_Disk"]))
            newFile.create()
            self.multipleSiteFileset.addFile(newFile)
        for i in range(5):
            newFile = File(makeUUID(),
                           size=1000,
                           events=100,
                           locations=set(["T2_CH_CERN", "T1_US_FNAL_Disk"]))
            newFile.create()
            self.multipleSiteFileset.addFile(newFile)
        self.multipleSiteFileset.commit()

        testWorkflow = Workflow(spec="spec.xml",
                                owner="Steve",
                                name="wf001",
                                task="Test")
        testWorkflow.create()
        self.multipleFileSubscription = Subscription(
            fileset=self.multipleFileFileset,
            workflow=testWorkflow,
            split_algo="FileBased",
            type="Processing")
        self.multipleFileSubscription.create()
        self.singleFileSubscription = Subscription(
            fileset=self.singleFileFileset,
            workflow=testWorkflow,
            split_algo="FileBased",
            type="Processing")
        self.singleFileSubscription.create()

        self.multipleSiteSubscription = Subscription(
            fileset=self.multipleSiteFileset,
            workflow=testWorkflow,
            split_algo="FileBased",
            type="Processing")
        self.multipleSiteSubscription.create()

        self.performanceParams = {
            'timePerEvent': 12,
            'memoryRequirement': 2300,
            'sizePerEvent': 400
        }
        return
Example #21
0
    def setUp(self):
        """
        _setUp_

        Create two subscriptions: One that contains a single file and one that
        contains multiple files.
        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules = ["WMCore.WMBS"],
                                useDefault = False)

        myThread = threading.currentThread()
        daofactory = DAOFactory(package = "WMCore.WMBS",
                                logger = myThread.logger,
                                dbinterface = myThread.dbi)

        locationAction = daofactory(classname = "Locations.New")
        locationAction.execute(siteName = 's1', seName = "somese.cern.ch")
        locationAction.execute(siteName = 's2', seName = "otherse.cern.ch")

        self.multipleFileFileset = Fileset(name = "TestFileset1")
        self.multipleFileFileset.create()
        parentFile = File('/parent/lfn/', size = 1000, events = 100,
                          locations = set(["somese.cern.ch"]))
        parentFile.create()
        for i in range(10):
            newFile = File(makeUUID(), size = 1000, events = 100,
                           locations = set(["somese.cern.ch"]))
            newFile.create()
            newFile.addParent(lfn = parentFile['lfn'])
            self.multipleFileFileset.addFile(newFile)
        self.multipleFileFileset.commit()

        self.singleFileFileset = Fileset(name = "TestFileset2")
        self.singleFileFileset.create()
        newFile = File("/some/file/name", size = 1000, events = 100,
                       locations = set(["somese.cern.ch"]))
        newFile.create()
        self.singleFileFileset.addFile(newFile)
        self.singleFileFileset.commit()


        self.multipleSiteFileset = Fileset(name = "TestFileset3")
        self.multipleSiteFileset.create()
        for i in range(5):
            newFile = File(makeUUID(), size = 1000, events = 100)
            newFile.setLocation("somese.cern.ch")
            newFile.create()
            self.multipleSiteFileset.addFile(newFile)
        for i in range(5):
            newFile = File(makeUUID(), size = 1000, events = 100)
            newFile.setLocation(["somese.cern.ch","otherse.cern.ch"])
            newFile.create()
            self.multipleSiteFileset.addFile(newFile)
        self.multipleSiteFileset.commit()

        testWorkflow = Workflow(spec = "spec.xml", owner = "Steve",
                                name = "wf001", task="Test")
        testWorkflow.create()
        self.multipleFileSubscription = Subscription(fileset = self.multipleFileFileset,
                                                     workflow = testWorkflow,
                                                     split_algo = "EventBased",
                                                     type = "Processing")
        self.multipleFileSubscription.create()
        self.singleFileSubscription = Subscription(fileset = self.singleFileFileset,
                                                   workflow = testWorkflow,
                                                   split_algo = "EventBased",
                                                   type = "Processing")
        self.singleFileSubscription.create()
        self.multipleSiteSubscription = Subscription(fileset = self.multipleSiteFileset,
                                                     workflow = testWorkflow,
                                                     split_algo = "EventBased",
                                                     type = "Processing")
        self.multipleSiteSubscription.create()
        return
Example #22
0
    def populateWMBS(self):
        """
        _populateWMBS_

        Create files and subscriptions in WMBS
        """
        myThread = threading.currentThread()
        daofactory = DAOFactory(package = "WMCore.WMBS",
                                logger = myThread.logger,
                                dbinterface = myThread.dbi)

        locationAction = daofactory(classname = "Locations.New")
        locationAction.execute(siteName = 's1', seName = "somese.cern.ch")
        locationAction.execute(siteName = 's2', seName = "otherse.cern.ch")
        self.validLocations = ["somese.cern.ch", "otherse.cern.ch"]

        self.multipleFileFileset = Fileset(name = "TestFileset1")
        self.multipleFileFileset.create()
        parentFile = File('/parent/lfn/', size = 1000, events = 100,
                          locations = set(["somese.cern.ch"]))
        parentFile.create()
        for _ in range(10):
            newFile = File(makeUUID(), size = 1000, events = 100,
                           locations = set(["somese.cern.ch"]))
            newFile.create()
            newFile.addParent(lfn = parentFile['lfn'])
            self.multipleFileFileset.addFile(newFile)
        self.multipleFileFileset.commit()

        self.singleFileFileset = Fileset(name = "TestFileset2")
        self.singleFileFileset.create()
        newFile = File("/some/file/name", size = 1000, events = 100,
                       locations = set(["somese.cern.ch"]))
        newFile.create()
        self.singleFileFileset.addFile(newFile)
        self.singleFileFileset.commit()


        self.multipleSiteFileset = Fileset(name = "TestFileset3")
        self.multipleSiteFileset.create()
        for _ in range(5):
            newFile = File(makeUUID(), size = 1000, events = 100)
            newFile.setLocation("somese.cern.ch")
            newFile.create()
            self.multipleSiteFileset.addFile(newFile)
        for _ in range(5):
            newFile = File(makeUUID(), size = 1000, events = 100)
            newFile.setLocation(["somese.cern.ch", "otherse.cern.ch"])
            newFile.create()
            self.multipleSiteFileset.addFile(newFile)
        self.multipleSiteFileset.commit()

        testWorkflow = Workflow(spec = "spec.xml", owner = "Steve",
                                name = "wf001", task = "Test")
        testWorkflow.create()
        self.multipleFileSubscription = Subscription(fileset = self.multipleFileFileset,
                                                     workflow = testWorkflow,
                                                     split_algo = "EventBased",
                                                     type = "Processing")
        self.multipleFileSubscription.create()
        self.singleFileSubscription = Subscription(fileset = self.singleFileFileset,
                                                   workflow = testWorkflow,
                                                   split_algo = "EventBased",
                                                   type = "Processing")
        self.singleFileSubscription.create()
        self.multipleSiteSubscription = Subscription(fileset = self.multipleSiteFileset,
                                                     workflow = testWorkflow,
                                                     split_algo = "EventBased",
                                                     type = "Processing")
        self.multipleSiteSubscription.create()

        return
    def __call__(self, filesets):
        """
        The algorithm itself
        """
        # Update run list
        self.getNewRuns()

        # Do per fileset work, abandon fileset processing on exception
        for fileset in filesets:
            ds = fileset.name
            try:
                # Do per run work
                watchCompleteFiles = []

                for watch in self.watchedRuns:
                    # Ensure watcher has dataset listed
                    watch.addDatasetOfInterest(ds)

                    # Query DBS to find all blocks for this run / dataset
                    (files, blocks, fileInfoMap) = \
                        self.dbsHelper.getFileInfo(watch.run, ds)

                    # Now determine all required parent blocks
                    parentBlocks = set()
                    if fileset.requireParents:
                        parentDs = self.dbsHelper.getParentDataset(ds)
                        parentBlocks = self.dbsHelper.getBlockInfo(watch.run,
                                                                       parentDs)

                    # Final list of all required blocks
                    allBlocks = blocks[:]
                    allBlocks.update(parentBlocks)

                    # Find all sites where all blocks are complete
                    sites = self.phedexHelper.getCompleteSites(blocks)

                    # Get sites with newly completed transfers
                    newSites = watch.getNewSites(ds, sites)

                    if len(newSites) > 0:
                        # Add the files for these blocks to the fileset
                        for file in fileInfoMap:
                            fi = fileInfoMap[file]

                            # First add parent file
                            if fileset.requireParents:
                                parentFile = File(lfn=fi["file.parent"])
                                parentFile.save()
                                parentFile.setLocation(newSites)

                            # Add actual file
                            fileToAdd = File(lfn=file, size=fi["file.size"],
                                             events=fi["file.events"],
                                             run=watch.run,
                                             umi=fi["file.lumi"])
                            if not fileToAdd.exists() and fileset.requireParents:
                                fileToAdd.addParent(fi["file.parent"])

                            # Add new locations but don't persist immediately
                            fileToAdd.setLocations(newSites, immediateSave=False)

                            # Add the file to the new file list
                            fileset.addFile(fileToAdd)

                    # Add the site info to the watcher list
                    watchCompleteFiles.append([watch, ds, newSites])

                # Commit the fileset
                fileset.commit()

                # Add the watched runs
                for a in watchCompleteFiles:
                    a[0].addCompletedNodes(a[1], a[2])

            except:
                # Reset the watch list so we re-evaluate next call
                watchCompleteFiles = []

        # Purge old runs
        self.purgeWatchedRuns()
Example #24
0
    def testParallelProcessing(self):
        """
        _testParallelProcessing_

        Verify that merging works correctly when multiple processing
        subscriptions are run over the same input files.  The merging algorithm
        should ignore processing jobs that feed into different merge
        subscriptions.
        """
        locationAction = self.daoFactory(classname = "Locations.New")
        locationAction.execute(siteName = "s1", seName = "somese.cern.ch")

        mergeFilesetA = Fileset(name = "mergeFilesetA")
        mergeFilesetB = Fileset(name = "mergeFilesetB")
        mergeFilesetA.create()
        mergeFilesetB.create()

        mergeMergedFilesetA = Fileset(name = "mergeMergedFilesetA")
        mergeMergedFilesetB = Fileset(name = "mergeMergedFilesetB")
        mergeMergedFilesetA.create()
        mergeMergedFilesetB.create()

        mergeWorkflow = Workflow(name = "mergeWorkflow", spec = "bogus",
                                 owner = "Steve", task = "Test")
        mergeWorkflow.create()

        mergeSubscriptionA = Subscription(fileset = mergeFilesetA,
                                          workflow = mergeWorkflow,
                                          split_algo = "WMBSMergeBySize")
        mergeSubscriptionB = Subscription(fileset = mergeFilesetB,
                                          workflow = mergeWorkflow,
                                          split_algo = "WMBSMergeBySize")
        mergeSubscriptionA.create()
        mergeSubscriptionB.create()

        inputFileset = Fileset(name = "inputFileset")
        inputFileset.create()

        inputFileA = File(lfn = "inputLFNA")
        inputFileB = File(lfn = "inputLFNB")
        inputFileA.create()
        inputFileB.create()

        procWorkflowA = Workflow(name = "procWorkflowA", spec = "bunk2",
                                 owner = "Steve", task = "Test")
        procWorkflowA.create()
        procWorkflowA.addOutput("output", mergeFilesetA, mergeMergedFilesetA)
        procWorkflowB = Workflow(name = "procWorkflowB", spec = "bunk3",
                                 owner = "Steve", task = "Test2")
        procWorkflowB.create()
        procWorkflowB.addOutput("output", mergeFilesetB, mergeMergedFilesetB)

        procSubscriptionA = Subscription(fileset = inputFileset,
                                         workflow = procWorkflowA,
                                         split_algo = "EventBased")
        procSubscriptionA.create()
        procSubscriptionB = Subscription(fileset = inputFileset,
                                         workflow = procWorkflowB,
                                         split_algo = "EventBased")
        procSubscriptionB.create()

        jobGroupA = JobGroup(subscription = procSubscriptionA)
        jobGroupA.create()
        jobGroupB = JobGroup(subscription = procSubscriptionB)
        jobGroupB.create()

        changeStateDAO = self.daoFactory(classname = "Jobs.ChangeState")

        testJobA = Job()
        testJobA.addFile(inputFileA)
        testJobA.create(jobGroupA)
        testJobA["state"] = "cleanout"
        testJobA["oldstate"] = "new"
        testJobA["couch_record"] = "somejive"
        testJobA["retry_count"] = 0
        testJobA["outcome"] = "success"
        testJobA.save()

        testJobB = Job()
        testJobB.addFile(inputFileB)
        testJobB.create(jobGroupA)
        testJobB["state"] = "cleanout"
        testJobB["oldstate"] = "new"
        testJobB["couch_record"] = "somejive"
        testJobB["retry_count"] = 0
        testJobB["outcome"] = "success"
        testJobB.save()

        testJobC = Job()
        testJobC.addFile(inputFileA)
        testJobC.create(jobGroupB)
        testJobC["state"] = "cleanout"
        testJobC["oldstate"] = "new"
        testJobC["couch_record"] = "somejive"
        testJobC["retry_count"] = 0
        testJobC["outcome"] = "success"
        testJobC.save()

        testJobD = Job()
        testJobD.addFile(inputFileA)
        testJobD.create(jobGroupB)
        testJobD["state"] = "cleanout"
        testJobD["oldstate"] = "new"
        testJobD["couch_record"] = "somejive"
        testJobD["retry_count"] = 0
        testJobD["outcome"] = "failure"
        testJobD.save()

        testJobE = Job()
        testJobE.addFile(inputFileB)
        testJobE.create(jobGroupB)
        testJobE["state"] = "cleanout"
        testJobE["oldstate"] = "new"
        testJobE["couch_record"] = "somejive"
        testJobE["retry_count"] = 0
        testJobE["outcome"] = "success"
        testJobE.save()

        testJobF = Job()
        testJobF.addFile(inputFileB)
        testJobF.create(jobGroupB)
        testJobF["state"] = "cleanout"
        testJobF["oldstate"] = "new"
        testJobF["couch_record"] = "somejive"
        testJobF["retry_count"] = 0
        testJobF["outcome"] = "failure"
        testJobF.save()

        changeStateDAO.execute([testJobA, testJobB, testJobC, testJobD,
                                testJobE, testJobF])

        fileA = File(lfn = "fileA", size = 1024, events = 1024, first_event = 0,
                     locations = set(["somese.cern.ch"]))
        fileA.addRun(Run(1, *[45]))
        fileA.create()
        fileA.addParent(inputFileA["lfn"])
        fileB = File(lfn = "fileB", size = 1024, events = 1024, first_event = 0,
                     locations = set(["somese.cern.ch"]))
        fileB.addRun(Run(1, *[45]))
        fileB.create()
        fileB.addParent(inputFileB["lfn"])

        jobGroupA.output.addFile(fileA)
        jobGroupA.output.addFile(fileB)
        jobGroupA.output.commit()

        mergeFilesetA.addFile(fileA)
        mergeFilesetA.addFile(fileB)
        mergeFilesetA.commit()

        fileC = File(lfn = "fileC", size = 1024, events = 1024, first_event = 0,
                     locations = set(["somese.cern.ch"]))
        fileC.addRun(Run(1, *[45]))
        fileC.create()
        fileC.addParent(inputFileA["lfn"])
        fileD = File(lfn = "fileD", size = 1024, events = 1024, first_event = 0,
                     locations = set(["somese.cern.ch"]))
        fileD.addRun(Run(1, *[45]))
        fileD.create()
        fileD.addParent(inputFileB["lfn"])

        jobGroupB.output.addFile(fileC)
        jobGroupB.output.addFile(fileD)

        mergeFilesetB.addFile(fileC)
        mergeFilesetB.addFile(fileD)
        mergeFilesetB.commit()

        splitter = SplitterFactory()
        jobFactory = splitter(package = "WMCore.WMBS",
                              subscription = mergeSubscriptionB)

        result = jobFactory(min_merge_size = 1, max_merge_size = 20000,
                            max_merge_events = 7169)

        assert len(result) == 0, \
               "Error: No merge jobs should have been created."

        fileE = File(lfn = "fileE", size = 1024, events = 1024, first_event = 0,
                     locations = set(["somese.cern.ch"]))
        fileE.addRun(Run(1, *[45]))
        fileE.create()
        fileE.addParent(inputFileA["lfn"])
        fileF = File(lfn = "fileF", size = 1024, events = 1024, first_event = 0,
                     locations = set(["somese.cern.ch"]))
        fileF.addRun(Run(1, *[45]))
        fileF.create()
        fileF.addParent(inputFileB["lfn"])

        jobGroupB.output.addFile(fileE)
        jobGroupB.output.addFile(fileF)

        mergeFilesetB.addFile(fileE)
        mergeFilesetB.addFile(fileF)
        mergeFilesetB.commit()

        testJobD["outcome"] = "success"
        testJobD.save()
        testJobF["outcome"] = "success"
        testJobF.save()

        changeStateDAO.execute([testJobD, testJobF])

        result = jobFactory(min_merge_size = 1, max_merge_size = 20000,
                            max_merge_events = 7169)

        assert len(result) == 1, \
               "Error: One merge job should have been created: %s" % len(result)

        return
Example #25
0
    def createTestJobGroup(self, nJobs = 10, retry_count = 0, workloadPath = 'test'):
        """
        Creates a group of several jobs
        """

        


        myThread = threading.currentThread()
        myThread.transaction.begin()
        testWorkflow = Workflow(spec = workloadPath, owner = "Simon",
                                name = "wf001", task="Test")
        testWorkflow.create()
        
        testWMBSFileset = Fileset(name = "TestFileset")
        testWMBSFileset.create()
        
        testSubscription = Subscription(fileset = testWMBSFileset,
                                        workflow = testWorkflow)
        testSubscription.create()

        testJobGroup = JobGroup(subscription = testSubscription)
        testJobGroup.create()

        testFile0 = File(lfn = "/this/is/a/parent", size = 1024, events = 10)
        testFile0.addRun(Run(10, *[12312]))
        testFile0.setLocation('malpaquet')

        testFileA = File(lfn = "/this/is/a/lfnA", size = 1024, events = 10)
        testFileA.addRun(Run(10, *[12312]))
        testFileA.setLocation('malpaquet')

        testFileB = File(lfn = "/this/is/a/lfnB", size = 1024, events = 10)
        testFileB.addRun(Run(10, *[12312]))
        testFileB.setLocation('malpaquet')

        testFile0.create()
        testFileA.create()
        testFileB.create()

        testFileA.addParent(lfn = "/this/is/a/parent")
        testFileB.addParent(lfn = "/this/is/a/parent")

        for i in range(0, nJobs):
            testJob = Job(name = makeUUID())
            testJob['retry_count'] = retry_count
            testJob['retry_max'] = 10
            testJob['group'] = 'BadGuys'
            testJob['user']  = '******'
            testJob['taskType'] = 'Merge'
            #testJob['fwjr'] = myReport
            testJobGroup.add(testJob)
            testJob.create(group = testJobGroup)
            testJob.addFile(testFileA)
            testJob.addFile(testFileB)
            testJob.save()

        
        testJobGroup.commit()


        testSubscription.acquireFiles(files = [testFileA, testFileB])
        testSubscription.save()
        myThread.transaction.commit()
        
        return testJobGroup
Example #26
0
    def stuffWMBS(self, injected = True):
        """
        _stuffWMBS_

        Insert some dummy jobs, jobgroups, filesets, files and subscriptions
        into WMBS to test job creation.  Three completed job groups each
        containing several files are injected.  Another incomplete job group is
        also injected.  Also files are added to the "Mergeable" subscription as
        well as to the output fileset for their jobgroups.
        """
        locationAction = self.daoFactory(classname = "Locations.New")
        locationAction.execute(siteName = "s1", seName = "somese.cern.ch")

        changeStateDAO = self.daoFactory(classname = "Jobs.ChangeState")

        self.mergeFileset = Fileset(name = "mergeFileset")
        self.mergeFileset.create()
        self.bogusFileset = Fileset(name = "bogusFileset")
        self.bogusFileset.create()

        self.mergeMergedFileset = Fileset(name = "mergeMergedFileset")
        self.mergeMergedFileset.create()
        self.bogusMergedFileset = Fileset(name = "bogusMergedFileset")
        self.bogusMergedFileset.create()

        mergeWorkflow = Workflow(name = "mergeWorkflow", spec = "bunk2",
                                 owner = "Steve", task="Test")
        mergeWorkflow.create()
        markWorkflow = self.daoFactory(classname = "Workflow.MarkInjectedWorkflows")
        markWorkflow.execute(names = [mergeWorkflow.name], injected = injected)

        self.mergeSubscription = Subscription(fileset = self.mergeFileset,
                                              workflow = mergeWorkflow,
                                              split_algo = "WMBSMergeBySize")
        self.mergeSubscription.create()
        self.bogusSubscription = Subscription(fileset = self.bogusFileset,
                                              workflow = mergeWorkflow,
                                              split_algo = "WMBSMergeBySize")

        inputFileset = Fileset(name = "inputFileset")
        inputFileset.create()

        inputWorkflow = Workflow(name = "inputWorkflow", spec = "input",
                                owner = "Steve", task = "Test")
        inputWorkflow.create()
        inputWorkflow.addOutput("output", self.mergeFileset,
                                self.mergeMergedFileset)
        inputWorkflow.addOutput("output2", self.bogusFileset,
                                self.bogusMergedFileset)
        bogusInputWorkflow = Workflow(name = "bogusInputWorkflow", spec = "input",
                                owner = "Steve", task = "Test")
        bogusInputWorkflow.create()

        inputSubscription = Subscription(fileset = inputFileset,
                                        workflow = inputWorkflow)
        inputSubscription.create()
        bogusInputSubscription = Subscription(fileset = inputFileset,
                                              workflow = bogusInputWorkflow)
        bogusInputSubscription.create()

        parentFile1 = File(lfn = "parentFile1")
        parentFile1.create()
        parentFile2 = File(lfn = "parentFile2")
        parentFile2.create()
        parentFile3 = File(lfn = "parentFile3")
        parentFile3.create()
        parentFile4 = File(lfn = "parentFile4")
        parentFile4.create()
        self.parentFileSite2 = File(lfn = "parentFileSite2")
        self.parentFileSite2.create()

        jobGroup1 = JobGroup(subscription = inputSubscription)
        jobGroup1.create()
        jobGroup2 = JobGroup(subscription = inputSubscription)
        jobGroup2.create()
        jobGroup3 = JobGroup(subscription = bogusInputSubscription)
        jobGroup3.create()

        testJob1 = Job()
        testJob1.addFile(parentFile1)
        testJob1.create(jobGroup1)
        testJob1["state"] = "cleanout"
        testJob1["oldstate"] = "new"
        testJob1["couch_record"] = "somejive"
        testJob1["retry_count"] = 0
        testJob1["outcome"] = "success"
        testJob1.save()
        changeStateDAO.execute([testJob1])

        testJob1A = Job()
        testJob1A.addFile(parentFile1)
        testJob1A.create(jobGroup3)
        testJob1A["state"] = "cleanout"
        testJob1A["oldstate"] = "new"
        testJob1A["couch_record"] = "somejive"
        testJob1A["retry_count"] = 0
        testJob1A["outcome"] = "failure"
        testJob1A.save()
        changeStateDAO.execute([testJob1A])

        testJob2 = Job()
        testJob2.addFile(parentFile2)
        testJob2.create(jobGroup1)
        testJob2["state"] = "cleanout"
        testJob2["oldstate"] = "new"
        testJob2["couch_record"] = "somejive"
        testJob2["retry_count"] = 0
        testJob2["outcome"] = "success"
        testJob2.save()
        changeStateDAO.execute([testJob2])

        testJob3 = Job()
        testJob3.addFile(parentFile3)
        testJob3.create(jobGroup2)
        testJob3["state"] = "cleanout"
        testJob3["oldstate"] = "new"
        testJob3["couch_record"] = "somejive"
        testJob3["retry_count"] = 0
        testJob3["outcome"] = "success"
        testJob3.save()
        changeStateDAO.execute([testJob3])

        testJob4 = Job()
        testJob4.addFile(parentFile4)
        testJob4.create(jobGroup2)
        testJob4["state"] = "cleanout"
        testJob4["oldstate"] = "new"
        testJob4["couch_record"] = "somejive"
        testJob4["retry_count"] = 0
        testJob4["outcome"] = "failure"
        testJob4.save()
        changeStateDAO.execute([testJob4])

        # We'll simulate a failed split by event job that the merger should
        # ignore.
        parentFile5 = File(lfn = "parentFile5")
        parentFile5.create()

        testJob5 = Job()
        testJob5.addFile(parentFile5)
        testJob5.create(jobGroup2)
        testJob5["state"] = "cleanout"
        testJob5["oldstate"] = "new"
        testJob5["couch_record"] = "somejive"
        testJob5["retry_count"] = 0
        testJob5["outcome"] = "success"
        testJob5.save()
        changeStateDAO.execute([testJob5])

        testJob6 = Job()
        testJob6.addFile(parentFile5)
        testJob6.create(jobGroup2)
        testJob6["state"] = "cleanout"
        testJob6["oldstate"] = "new"
        testJob6["couch_record"] = "somejive"
        testJob6["retry_count"] = 0
        testJob6["outcome"] = "failure"
        testJob6.save()
        changeStateDAO.execute([testJob6])

        testJob7 = Job()
        testJob7.addFile(self.parentFileSite2)
        testJob7.create(jobGroup2)
        testJob7["state"] = "cleanout"
        testJob7["oldstate"] = "new"
        testJob7["couch_record"] = "somejive"
        testJob7["retry_count"] = 0
        testJob7["outcome"] = "success"
        testJob7.save()
        changeStateDAO.execute([testJob7])

        badFile1 = File(lfn = "badFile1", size = 10241024, events = 10241024,
                        first_event = 0, locations = set(["somese.cern.ch"]))
        badFile1.addRun(Run(1, *[45]))
        badFile1.create()
        badFile1.addParent(parentFile5["lfn"])

        file1 = File(lfn = "file1", size = 1024, events = 1024, first_event = 0,
                     locations = set(["somese.cern.ch"]))
        file1.addRun(Run(1, *[45]))
        file1.create()
        file1.addParent(parentFile1["lfn"])
        file2 = File(lfn = "file2", size = 1024, events = 1024,
                     first_event = 1024, locations = set(["somese.cern.ch"]))
        file2.addRun(Run(1, *[45]))
        file2.create()
        file2.addParent(parentFile1["lfn"])
        file3 = File(lfn = "file3", size = 1024, events = 1024,
                     first_event = 2048, locations = set(["somese.cern.ch"]))
        file3.addRun(Run(1, *[45]))
        file3.create()
        file3.addParent(parentFile1["lfn"])
        file4 = File(lfn = "file4", size = 1024, events = 1024,
                     first_event = 3072, locations = set(["somese.cern.ch"]))
        file4.addRun(Run(1, *[45]))
        file4.create()
        file4.addParent(parentFile1["lfn"])

        fileA = File(lfn = "fileA", size = 1024, events = 1024,
                     first_event = 0, locations = set(["somese.cern.ch"]))
        fileA.addRun(Run(1, *[46]))
        fileA.create()
        fileA.addParent(parentFile2["lfn"])
        fileB = File(lfn = "fileB", size = 1024, events = 1024,
                     first_event = 1024, locations = set(["somese.cern.ch"]))
        fileB.addRun(Run(1, *[46]))
        fileB.create()
        fileB.addParent(parentFile2["lfn"])
        fileC = File(lfn = "fileC", size = 1024, events = 1024,
                     first_event = 2048, locations = set(["somese.cern.ch"]))
        fileC.addRun(Run(1, *[46]))
        fileC.create()
        fileC.addParent(parentFile2["lfn"])

        fileI = File(lfn = "fileI", size = 1024, events = 1024,
                     first_event = 0, locations = set(["somese.cern.ch"]))
        fileI.addRun(Run(2, *[46]))
        fileI.create()
        fileI.addParent(parentFile3["lfn"])
        fileII = File(lfn = "fileII", size = 1024, events = 1024,
                      first_event = 1024, locations = set(["somese.cern.ch"]))
        fileII.addRun(Run(2, *[46]))
        fileII.create()
        fileII.addParent(parentFile3["lfn"])
        fileIII = File(lfn = "fileIII", size = 1024, events = 1024,
                       first_event = 2048, locations = set(["somese.cern.ch"]))
        fileIII.addRun(Run(2, *[46]))
        fileIII.create()
        fileIII.addParent(parentFile3["lfn"])
        fileIV = File(lfn = "fileIV", size = 1024, events = 1024,
                      first_event = 3072, locations = set(["somese.cern.ch"]))
        fileIV.addRun(Run(2, *[46]))
        fileIV.create()
        fileIV.addParent(parentFile3["lfn"])

        fileX = File(lfn = "badFileA", size = 1024, events = 1024,
                     first_event = 0, locations = set(["somese.cern.ch"]))
        fileX.addRun(Run(1, *[47]))
        fileX.create()
        fileX.addParent(parentFile4["lfn"])
        fileY = File(lfn = "badFileB", size = 1024, events = 1024,
                     first_event = 1024, locations = set(["somese.cern.ch"]))
        fileY.addRun(Run(1, *[47]))
        fileY.create()
        fileY.addParent(parentFile4["lfn"])
        fileZ = File(lfn = "badFileC", size = 1024, events = 1024,
                     first_event = 2048, locations = set(["somese.cern.ch"]))
        fileZ.addRun(Run(1, *[47]))
        fileZ.create()
        fileZ.addParent(parentFile4["lfn"])

        jobGroup1.output.addFile(file1)
        jobGroup1.output.addFile(file2)
        jobGroup1.output.addFile(file3)
        jobGroup1.output.addFile(file4)
        jobGroup1.output.addFile(fileA)
        jobGroup1.output.addFile(fileB)
        jobGroup1.output.addFile(fileC)
        jobGroup1.output.commit()

        jobGroup2.output.addFile(fileI)
        jobGroup2.output.addFile(fileII)
        jobGroup2.output.addFile(fileIII)
        jobGroup2.output.addFile(fileIV)
        jobGroup2.output.addFile(fileX)
        jobGroup2.output.addFile(fileY)
        jobGroup2.output.addFile(fileZ)
        jobGroup2.output.addFile(badFile1)
        jobGroup2.output.commit()

        for file in [file1, file2, file3, file4, fileA, fileB, fileC, fileI,
                     fileII, fileIII, fileIV, fileX, fileY, fileZ, badFile1]:
            self.mergeFileset.addFile(file)
            self.bogusFileset.addFile(file)

        self.mergeFileset.commit()
        self.bogusFileset.commit()

        return
Example #27
0
    def __call__(self, filesets):
        """
        The algorithm itself
        """
        # Update run list
        self.getNewRuns()

        # Do per fileset work, abandon fileset processing on exception
        for fileset in filesets:
            ds = fileset.name
            try:
                # Do per run work
                watchCompleteFiles = []

                for watch in self.watchedRuns:
                    # Ensure watcher has dataset listed
                    watch.addDatasetOfInterest(ds)

                    # Query DBS to find all blocks for this run / dataset
                    (files, blocks, fileInfoMap) = \
                        self.dbsHelper.getFileInfo(watch.run, ds)

                    # Now determine all required parent blocks
                    parentBlocks = set()
                    if fileset.requireParents:
                        parentDs = self.dbsHelper.getParentDataset(ds)
                        parentBlocks = self.dbsHelper.getBlockInfo(
                            watch.run, parentDs)

                    # Final list of all required blocks
                    allBlocks = blocks[:]
                    allBlocks.update(parentBlocks)

                    # Find all sites where all blocks are complete
                    sites = self.phedexHelper.getCompleteSites(blocks)

                    # Get sites with newly completed transfers
                    newSites = watch.getNewSites(ds, sites)

                    if len(newSites) > 0:
                        # Add the files for these blocks to the fileset
                        for file in fileInfoMap:
                            fi = fileInfoMap[file]

                            # First add parent file
                            if fileset.requireParents:
                                parentFile = File(lfn=fi["file.parent"])
                                parentFile.save()
                                parentFile.setLocation(newSites)

                            # Add actual file
                            fileToAdd = File(lfn=file,
                                             size=fi["file.size"],
                                             events=fi["file.events"],
                                             run=watch.run,
                                             umi=fi["file.lumi"])
                            if not fileToAdd.exists(
                            ) and fileset.requireParents:
                                fileToAdd.addParent(fi["file.parent"])

                            # Add new locations but don't persist immediately
                            fileToAdd.setLocations(newSites,
                                                   immediateSave=False)

                            # Add the file to the new file list
                            fileset.addFile(fileToAdd)

                    # Add the site info to the watcher list
                    watchCompleteFiles.append([watch, ds, newSites])

                # Commit the fileset
                fileset.commit()

                # Add the watched runs
                for a in watchCompleteFiles:
                    a[0].addCompletedNodes(a[1], a[2])

            except:
                # Reset the watch list so we re-evaluate next call
                watchCompleteFiles = []

        # Purge old runs
        self.purgeWatchedRuns()
Example #28
0
    def populateWMBS(self):
        """
        _populateWMBS_

        Create files and subscriptions in WMBS
        """
        myThread = threading.currentThread()
        daofactory = DAOFactory(package="WMCore.WMBS",
                                logger=myThread.logger,
                                dbinterface=myThread.dbi)

        locationAction = daofactory(classname="Locations.New")
        locationAction.execute(siteName='s1', seName="somese.cern.ch")
        locationAction.execute(siteName='s2', seName="otherse.cern.ch")
        self.validLocations = ["somese.cern.ch", "otherse.cern.ch"]

        self.multipleFileFileset = Fileset(name="TestFileset1")
        self.multipleFileFileset.create()
        parentFile = File('/parent/lfn/',
                          size=1000,
                          events=100,
                          locations=set(["somese.cern.ch"]))
        parentFile.create()
        for _ in range(10):
            newFile = File(makeUUID(),
                           size=1000,
                           events=100,
                           locations=set(["somese.cern.ch"]))
            newFile.create()
            newFile.addParent(lfn=parentFile['lfn'])
            self.multipleFileFileset.addFile(newFile)
        self.multipleFileFileset.commit()

        self.singleFileFileset = Fileset(name="TestFileset2")
        self.singleFileFileset.create()
        newFile = File("/some/file/name",
                       size=1000,
                       events=100,
                       locations=set(["somese.cern.ch"]))
        newFile.create()
        self.singleFileFileset.addFile(newFile)
        self.singleFileFileset.commit()

        self.multipleSiteFileset = Fileset(name="TestFileset3")
        self.multipleSiteFileset.create()
        for _ in range(5):
            newFile = File(makeUUID(), size=1000, events=100)
            newFile.setLocation("somese.cern.ch")
            newFile.create()
            self.multipleSiteFileset.addFile(newFile)
        for _ in range(5):
            newFile = File(makeUUID(), size=1000, events=100)
            newFile.setLocation(["somese.cern.ch", "otherse.cern.ch"])
            newFile.create()
            self.multipleSiteFileset.addFile(newFile)
        self.multipleSiteFileset.commit()

        testWorkflow = Workflow(spec="spec.xml",
                                owner="Steve",
                                name="wf001",
                                task="Test")
        testWorkflow.create()
        self.multipleFileSubscription = Subscription(
            fileset=self.multipleFileFileset,
            workflow=testWorkflow,
            split_algo="EventBased",
            type="Processing")
        self.multipleFileSubscription.create()
        self.singleFileSubscription = Subscription(
            fileset=self.singleFileFileset,
            workflow=testWorkflow,
            split_algo="EventBased",
            type="Processing")
        self.singleFileSubscription.create()
        self.multipleSiteSubscription = Subscription(
            fileset=self.multipleSiteFileset,
            workflow=testWorkflow,
            split_algo="EventBased",
            type="Processing")
        self.multipleSiteSubscription.create()

        return
Example #29
0
    def createTestJobGroup(self, nJobs = 10, retry_count = 1,
                           workloadPath = 'test', fwjrPath = None,
                           workloadName = makeUUID()):
        """
        Creates a group of several jobs
        """


        myThread = threading.currentThread()
        myThread.transaction.begin()
        testWorkflow = Workflow(spec = workloadPath, owner = "cmsdataops", group = "cmsdataops",
                                name = workloadName, task="/TestWorkload/ReReco")
        testWorkflow.create()
        
        testWMBSFileset = Fileset(name = "TestFileset")
        testWMBSFileset.create()
        
        testSubscription = Subscription(fileset = testWMBSFileset,
                                        workflow = testWorkflow)
        testSubscription.create()

        testJobGroup = JobGroup(subscription = testSubscription)
        testJobGroup.create()

        testFile0 = File(lfn = "/this/is/a/parent", size = 1024, events = 10)
        testFile0.addRun(Run(10, *[12312]))
        testFile0.setLocation('malpaquet')

        testFileA = File(lfn = "/this/is/a/lfnA", size = 1024, events = 10,
                         first_event = 88, last_event = 99)
        testFileA.addRun(Run(10, *[12312, 12313]))
        testFileA.setLocation('malpaquet')

        testFileB = File(lfn = "/this/is/a/lfnB", size = 1024, events = 10,
                         first_event = 88, last_event = 99)
        testFileB.addRun(Run(10, *[12314, 12315, 12316]))
        testFileB.setLocation('malpaquet')

        testFile0.create()
        testFileA.create()
        testFileB.create()

        testFileA.addParent(lfn = "/this/is/a/parent")
        testFileB.addParent(lfn = "/this/is/a/parent")

        for i in range(0, nJobs):
            testJob = Job(name = makeUUID())
            testJob['retry_count'] = retry_count
            testJob['retry_max'] = 10
            testJob['mask'].addRunAndLumis(run = 10, lumis = [12312])
            testJob['mask'].addRunAndLumis(run = 10, lumis = [12314, 12316])
            testJob['mask']['FirstEvent'] = 100
            testJob['cache_dir'] = os.path.join(self.testDir, testJob['name'])
            testJob['fwjr_path'] = fwjrPath
            os.mkdir(testJob['cache_dir'])
            testJobGroup.add(testJob)
            testJob.create(group = testJobGroup)
            testJob.addFile(testFileA)
            testJob.addFile(testFileB)
            testJob.save()

        
        testJobGroup.commit()


        testSubscription.acquireFiles(files = [testFileA, testFileB])
        testSubscription.save()
        myThread.transaction.commit()
        
        return testJobGroup
Example #30
0
    def createTestJobGroup(self,
                           nJobs=10,
                           retry_count=1,
                           workloadPath='test',
                           fwjrPath=None,
                           workloadName=makeUUID(),
                           fileModifier=''):
        """
        Creates a group of several jobs
        """

        myThread = threading.currentThread()
        myThread.transaction.begin()
        testWorkflow = Workflow(spec=workloadPath,
                                owner="cmsdataops",
                                group="cmsdataops",
                                name=workloadName,
                                task="/TestWorkload/ReReco")
        testWorkflow.create()

        testWMBSFileset = Fileset(name="TestFileset")
        testWMBSFileset.create()

        testSubscription = Subscription(fileset=testWMBSFileset,
                                        workflow=testWorkflow)
        testSubscription.create()

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        testFile0 = File(lfn="/this/is/a/parent%s" % fileModifier,
                         size=1024,
                         events=10)
        testFile0.addRun(Run(10, *[12312]))
        testFile0.setLocation('T2_CH_CERN')

        testFileA = File(lfn="/this/is/a/lfnA%s" % fileModifier,
                         size=1024,
                         events=10,
                         first_event=88,
                         merged=False)
        testFileA.addRun(Run(10, *[12312, 12313]))
        testFileA.setLocation('T2_CH_CERN')

        testFileB = File(lfn="/this/is/a/lfnB%s" % fileModifier,
                         size=1024,
                         events=10,
                         first_event=88,
                         merged=False)
        testFileB.addRun(Run(10, *[12314, 12315, 12316]))
        testFileB.setLocation('T2_CH_CERN')

        testFile0.create()
        testFileA.create()
        testFileB.create()

        testFileA.addParent(lfn="/this/is/a/parent%s" % fileModifier)
        testFileB.addParent(lfn="/this/is/a/parent%s" % fileModifier)

        for i in range(0, nJobs):
            testJob = Job(name=makeUUID())
            testJob['retry_count'] = retry_count
            testJob['retry_max'] = 10
            testJob['mask'].addRunAndLumis(run=10, lumis=[12312])
            testJob['mask'].addRunAndLumis(run=10, lumis=[12314, 12316])
            testJob['cache_dir'] = os.path.join(self.testDir, testJob['name'])
            testJob['fwjr_path'] = fwjrPath
            os.mkdir(testJob['cache_dir'])
            testJobGroup.add(testJob)
            testJob.create(group=testJobGroup)
            testJob.addFile(testFileA)
            testJob.addFile(testFileB)
            testJob.save()

        testJobGroup.commit()

        testSubscription.acquireFiles(files=[testFileA, testFileB])
        testSubscription.save()
        myThread.transaction.commit()

        return testJobGroup
Example #31
0
    def testParallelProcessing(self):
        """
        _testParallelProcessing_

        Verify that merging works correctly when multiple processing
        subscriptions are run over the same input files.  The merging algorithm
        should ignore processing jobs that feed into different merge
        subscriptions.
        """
        locationAction = self.daoFactory(classname="Locations.New")
        locationAction.execute(siteName="T2_CH_CERN", pnn="T2_CH_CERN")
        locationAction.execute(siteName="T1_US_FNAL", pnn="T2_CH_CERN")

        mergeFilesetA = Fileset(name="mergeFilesetA")
        mergeFilesetB = Fileset(name="mergeFilesetB")
        mergeFilesetA.create()
        mergeFilesetB.create()

        mergeMergedFilesetA = Fileset(name="mergeMergedFilesetA")
        mergeMergedFilesetB = Fileset(name="mergeMergedFilesetB")
        mergeMergedFilesetA.create()
        mergeMergedFilesetB.create()

        mergeWorkflow = Workflow(name="mergeWorkflow", spec="bogus",
                                 owner="Steve", task="Test")
        mergeWorkflow.create()

        mergeSubscriptionA = Subscription(fileset=mergeFilesetA,
                                          workflow=mergeWorkflow,
                                          split_algo="WMBSMergeBySize")
        mergeSubscriptionB = Subscription(fileset=mergeFilesetB,
                                          workflow=mergeWorkflow,
                                          split_algo="WMBSMergeBySize")
        mergeSubscriptionA.create()
        mergeSubscriptionB.create()

        inputFileset = Fileset(name="inputFileset")
        inputFileset.create()

        inputFileA = File(lfn="inputLFNA")
        inputFileB = File(lfn="inputLFNB")
        inputFileA.create()
        inputFileB.create()

        procWorkflowA = Workflow(name="procWorkflowA", spec="bunk2",
                                 owner="Steve", task="Test")
        procWorkflowA.create()
        procWorkflowA.addOutput("output", mergeFilesetA, mergeMergedFilesetA)
        procWorkflowB = Workflow(name="procWorkflowB", spec="bunk3",
                                 owner="Steve", task="Test2")
        procWorkflowB.create()
        procWorkflowB.addOutput("output", mergeFilesetB, mergeMergedFilesetB)

        procSubscriptionA = Subscription(fileset=inputFileset,
                                         workflow=procWorkflowA,
                                         split_algo="EventBased")
        procSubscriptionA.create()
        procSubscriptionB = Subscription(fileset=inputFileset,
                                         workflow=procWorkflowB,
                                         split_algo="EventBased")
        procSubscriptionB.create()

        jobGroupA = JobGroup(subscription=procSubscriptionA)
        jobGroupA.create()
        jobGroupB = JobGroup(subscription=procSubscriptionB)
        jobGroupB.create()

        changeStateDAO = self.daoFactory(classname="Jobs.ChangeState")

        testJobA = Job()
        testJobA.addFile(inputFileA)
        testJobA.create(jobGroupA)
        testJobA["state"] = "cleanout"
        testJobA["oldstate"] = "new"
        testJobA["couch_record"] = "somejive"
        testJobA["retry_count"] = 0
        testJobA["outcome"] = "success"
        testJobA.save()

        testJobB = Job()
        testJobB.addFile(inputFileB)
        testJobB.create(jobGroupA)
        testJobB["state"] = "cleanout"
        testJobB["oldstate"] = "new"
        testJobB["couch_record"] = "somejive"
        testJobB["retry_count"] = 0
        testJobB["outcome"] = "success"
        testJobB.save()

        testJobC = Job()
        testJobC.addFile(inputFileA)
        testJobC.create(jobGroupB)
        testJobC["state"] = "cleanout"
        testJobC["oldstate"] = "new"
        testJobC["couch_record"] = "somejive"
        testJobC["retry_count"] = 0
        testJobC["outcome"] = "success"
        testJobC.save()

        testJobD = Job()
        testJobD.addFile(inputFileA)
        testJobD.create(jobGroupB)
        testJobD["state"] = "cleanout"
        testJobD["oldstate"] = "new"
        testJobD["couch_record"] = "somejive"
        testJobD["retry_count"] = 0
        testJobD["outcome"] = "failure"
        testJobD.save()

        testJobE = Job()
        testJobE.addFile(inputFileB)
        testJobE.create(jobGroupB)
        testJobE["state"] = "cleanout"
        testJobE["oldstate"] = "new"
        testJobE["couch_record"] = "somejive"
        testJobE["retry_count"] = 0
        testJobE["outcome"] = "success"
        testJobE.save()

        testJobF = Job()
        testJobF.addFile(inputFileB)
        testJobF.create(jobGroupB)
        testJobF["state"] = "cleanout"
        testJobF["oldstate"] = "new"
        testJobF["couch_record"] = "somejive"
        testJobF["retry_count"] = 0
        testJobF["outcome"] = "failure"
        testJobF.save()

        changeStateDAO.execute([testJobA, testJobB, testJobC, testJobD,
                                testJobE, testJobF])

        fileA = File(lfn="fileA", size=1024, events=1024, first_event=0,
                     locations={"T2_CH_CERN"})
        fileA.addRun(Run(1, *[45]))
        fileA.create()
        fileA.addParent(inputFileA["lfn"])
        fileB = File(lfn="fileB", size=1024, events=1024, first_event=0,
                     locations={"T2_CH_CERN"})
        fileB.addRun(Run(1, *[45]))
        fileB.create()
        fileB.addParent(inputFileB["lfn"])

        jobGroupA.output.addFile(fileA)
        jobGroupA.output.addFile(fileB)
        jobGroupA.output.commit()

        mergeFilesetA.addFile(fileA)
        mergeFilesetA.addFile(fileB)
        mergeFilesetA.commit()

        fileC = File(lfn="fileC", size=1024, events=1024, first_event=0,
                     locations={"T2_CH_CERN"})
        fileC.addRun(Run(1, *[45]))
        fileC.create()
        fileC.addParent(inputFileA["lfn"])
        fileD = File(lfn="fileD", size=1024, events=1024, first_event=0,
                     locations={"T2_CH_CERN"})
        fileD.addRun(Run(1, *[45]))
        fileD.create()
        fileD.addParent(inputFileB["lfn"])

        jobGroupB.output.addFile(fileC)
        jobGroupB.output.addFile(fileD)

        mergeFilesetB.addFile(fileC)
        mergeFilesetB.addFile(fileD)
        mergeFilesetB.commit()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=mergeSubscriptionB)

        result = jobFactory(min_merge_size=1, max_merge_size=20000,
                            max_merge_events=7169)

        assert len(result) == 0, \
            "Error: No merge jobs should have been created."

        fileE = File(lfn="fileE", size=1024, events=1024, first_event=0,
                     locations={"T2_CH_CERN"})
        fileE.addRun(Run(1, *[45]))
        fileE.create()
        fileE.addParent(inputFileA["lfn"])
        fileF = File(lfn="fileF", size=1024, events=1024, first_event=0,
                     locations={"T2_CH_CERN"})
        fileF.addRun(Run(1, *[45]))
        fileF.create()
        fileF.addParent(inputFileB["lfn"])

        jobGroupB.output.addFile(fileE)
        jobGroupB.output.addFile(fileF)

        mergeFilesetB.addFile(fileE)
        mergeFilesetB.addFile(fileF)
        mergeFilesetB.commit()

        testJobD["outcome"] = "success"
        testJobD.save()
        testJobF["outcome"] = "success"
        testJobF.save()

        changeStateDAO.execute([testJobD, testJobF])

        result = jobFactory(min_merge_size=1, max_merge_size=20000,
                            max_merge_events=7169)

        assert len(result) == 1, \
            "Error: One merge job should have been created: %s" % len(result)

        return
Example #32
0
    def testGetInfo(self):
        """
        _testGetInfo_

        Test the getInfo() method of the File class to make sure that it
        returns the correct information.
        """
        testFileParent = File(lfn="/this/is/a/parent/lfn",
                              size=1024,
                              events=20,
                              checksums={'cksum': 1111})
        testFileParent.addRun(Run(1, *[45]))
        testFileParent.create()

        testFile = File(lfn="/this/is/a/lfn",
                        size=1024,
                        events=10,
                        checksums={'cksum': 222})
        testFile.addRun(Run(1, *[45]))
        testFile.addRun(Run(2, *[46, 47]))
        testFile.addRun(Run(2, *[47, 48]))
        testFile.create()
        testFile.setLocation(se="se1.fnal.gov", immediateSave=False)
        testFile.setLocation(se="se1.cern.ch", immediateSave=False)
        testFile.addParent("/this/is/a/parent/lfn")

        info = testFile.getInfo()

        assert info[0] == testFile["lfn"], \
               "ERROR: File returned wrong LFN"

        assert info[1] == testFile["id"], \
               "ERROR: File returned wrong ID"

        assert info[2] == testFile["size"], \
               "ERROR: File returned wrong size"

        assert info[3] == testFile["events"], \
               "ERROR: File returned wrong events"

        assert info[4] == testFile["checksums"], \
               "ERROR: File returned wrong cksum"

        assert len(info[5]) == 2, \
                      "ERROR: File returned wrong runs"

        assert info[5] == [Run(1, *[45]), Run(2, *[46, 47, 48])], \
               "Error: Run hasn't been combined correctly"

        assert len(info[6]) == 2, \
               "ERROR: File returned wrong locations"

        for testLocation in info[6]:
            assert testLocation in ["se1.fnal.gov", "se1.cern.ch"], \
                   "ERROR: File returned wrong locations"

        assert len(info[7]) == 1, \
               "ERROR: File returned wrong parents"

        assert info[7][0] == testFileParent, \
               "ERROR: File returned wrong parents"

        testFile.delete()
        testFileParent.delete()
        return