Beispiel #1
0
    def setUp(self):
        """
        _setUp_

        Standard setup: Now with 100% more couch
        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules=[
            "WMCore.WMBS", "WMCore.BossAir", "WMCore.ResourceControl",
            "WMCore.Agent.Database"
        ])
        self.testInit.setupCouch("jobsubmitter_t/jobs", "JobDump")
        self.testInit.setupCouch("jobsubmitter_t/fwjrs", "FWJRDump")
        self.testInit.setupCouch("wmagent_summary_t", "WMStats")

        myThread = threading.currentThread()
        self.daoFactory = DAOFactory(package="WMCore.WMBS",
                                     logger=myThread.logger,
                                     dbinterface=myThread.dbi)
        self.baDaoFactory = DAOFactory(package="WMCore.BossAir",
                                       logger=myThread.logger,
                                       dbinterface=myThread.dbi)

        self.testDir = self.testInit.generateWorkDir()

        # Set heartbeat
        self.componentName = 'JobSubmitter'
        self.heartbeatAPI = HeartbeatAPI(self.componentName)
        self.heartbeatAPI.registerComponent()
        self.configFile = EmulatorSetup.setupWMAgentConfig()

        return
Beispiel #2
0
    def setUp(self):
        """
        _setUp_

        Setup the database and logging connection.  Try to create all of the
        WMBS tables.  Also, create some dummy locations.
        """
        super(JobCreatorTest, self).setUp()

        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()

        self.testInit.setSchema(customModules=[
            'WMCore.WMBS', 'WMCore.ResourceControl', 'WMCore.Agent.Database'
        ],
                                useDefault=False)
        self.couchdbname = "jobcreator_t"
        self.testInit.setupCouch("%s/jobs" % self.couchdbname, "JobDump")
        self.testInit.setupCouch("%s/fwjrs" % self.couchdbname, "FWJRDump")
        self.configFile = EmulatorSetup.setupWMAgentConfig()

        myThread = threading.currentThread()
        self.daoFactory = DAOFactory(package="WMCore.WMBS",
                                     logger=myThread.logger,
                                     dbinterface=myThread.dbi)

        locationAction = self.daoFactory(classname="Locations.New")
        for site in self.sites:
            locationAction.execute(siteName=site, pnn=site)

        # Create sites in resourceControl

        resourceControl = ResourceControl()
        for site in self.sites:
            resourceControl.insertSite(siteName=site, pnn=site, ceName=site)
            resourceControl.insertThreshold(siteName=site,
                                            taskType='Processing',
                                            maxSlots=10000,
                                            pendingSlots=10000)

        self.resourceControl = resourceControl

        self._setup = True
        self._teardown = False

        self.testDir = self.testInit.generateWorkDir()
        self.cwd = os.getcwd()

        # Set heartbeat
        self.componentName = 'JobCreator'
        self.heartbeatAPI = HeartbeatAPI(self.componentName)
        self.heartbeatAPI.registerComponent()

        if PY3:
            self.assertItemsEqual = self.assertCountEqual

        return
Beispiel #3
0
    def setUp(self):
        """
        setup for test.
        """

        myThread = threading.currentThread()

        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection(destroyAllDatabase=True)
        self.tearDown()
        self.testInit.setSchema(
            customModules=["WMCore.WMBS", "WMCore.BossAir", "WMCore.ResourceControl", "WMCore.Agent.Database"],
            useDefault=False)
        self.testInit.setupCouch("bossair_t/jobs", "JobDump")
        self.testInit.setupCouch("bossair_t/fwjrs", "FWJRDump")

        self.daoFactory = DAOFactory(package="WMCore.WMBS",
                                     logger=myThread.logger,
                                     dbinterface=myThread.dbi)
        self.getJobs = self.daoFactory(classname="Jobs.GetAllJobs")

        # Create sites in resourceControl
        resourceControl = ResourceControl()
        for site in self.sites:
            resourceControl.insertSite(siteName=site, pnn='%s_PNN' % site, cmsName=site,
                                       ceName=site, plugin="SimpleCondorPlugin", pendingSlots=1000,
                                       runningSlots=2000)
            resourceControl.insertThreshold(siteName=site, taskType='Processing',
                                            maxSlots=1000, pendingSlots=1000)

        site = 'T3_US_Xanadu'
        resourceControl.insertSite(siteName=site, pnn='%s_PNN' % site, cmsName=site,
                                   ceName=site, plugin="TestPlugin")
        resourceControl.insertThreshold(siteName=site, taskType='Processing',
                                        maxSlots=10000, pendingSlots=10000)

        # Create user
        newuser = self.daoFactory(classname="Users.New")
        newuser.execute(dn="tapas", group_name="phgroup", role_name="cmsrole")

        # We actually need the user name
        self.user = getpass.getuser()

        # Change this to the working dir to keep track of error and log files from condor
        self.testDir = self.testInit.generateWorkDir()

        # Set heartbeat
        componentName = 'test'
        self.heartbeatAPI = HeartbeatAPI(componentName)
        self.heartbeatAPI.registerComponent()
        componentName = 'JobTracker'
        self.heartbeatAPI2 = HeartbeatAPI(componentName)
        self.heartbeatAPI2.registerComponent()

        return
Beispiel #4
0
    def setUp(self):
        """
        _setUp_

        setUp function for unittest
        """
        # Set constants
        self.couchDB = "config_test"
        self.configURL = "RANDOM;;URL;;NAME"
        self.configString = "This is a random string"

        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(
            customModules=["WMComponent.DBS3Buffer", 'WMCore.Agent.Database'],
            useDefault=False)
        self.testInit.setupCouch(self.couchDB, "GroupUser", "ConfigCache")

        myThread = threading.currentThread()
        self.bufferFactory = DAOFactory(
            package="WMComponent.DBSBuffer.Database",
            logger=myThread.logger,
            dbinterface=myThread.dbi)
        self.buffer3Factory = DAOFactory(package="WMComponent.DBS3Buffer",
                                         logger=myThread.logger,
                                         dbinterface=myThread.dbi)

        locationAction = self.bufferFactory(
            classname="DBSBufferFiles.AddLocation")
        locationAction.execute(siteName="se1.cern.ch")
        locationAction.execute(siteName="se1.fnal.gov")
        locationAction.execute(siteName="malpaquet")

        # Set heartbeat
        self.componentName = 'JobSubmitter'
        self.heartbeatAPI = HeartbeatAPI(self.componentName)
        self.heartbeatAPI.registerComponent()

        # Set up a config cache
        configCache = ConfigCache(os.environ["COUCHURL"],
                                  couchDBName=self.couchDB)
        configCache.createUserGroup(groupname="testGroup", username='******')
        self.testDir = self.testInit.generateWorkDir()

        psetPath = os.path.join(self.testDir, "PSet.txt")
        f = open(psetPath, 'w')
        f.write(self.configString)
        f.close()

        configCache.addConfig(newConfig=psetPath, psetHash=None)
        configCache.save()
        self.configURL = "%s;;%s;;%s" % (os.environ["COUCHURL"], self.couchDB,
                                         configCache.getCouchID())
        return
Beispiel #5
0
    def setUp(self):
        """
        _setUp_

        Set up vital components
        """

        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules = ["WMCore.WMBS",'WMCore.MsgService',
                                                 'WMCore.ResourceControl', 'WMCore.ThreadPool',
                                                 'WMCore.Agent.Database'],
                                useDefault = False)

        myThread = threading.currentThread()
        self.daoFactory = DAOFactory(package = "WMCore.WMBS",
                                     logger = myThread.logger,
                                     dbinterface = myThread.dbi)



        locationAction = self.daoFactory(classname = "Locations.New")
        pendingSlots  = self.daoFactory(classname = "Locations.SetPendingSlots")


        for site in self.sites:
            locationAction.execute(siteName = site, seName = 'se.%s' % (site), ceName = site)
            pendingSlots.execute(siteName = site, pendingSlots = 1000)


        #Create sites in resourceControl
        resourceControl = ResourceControl()
        for site in self.sites:
            resourceControl.insertSite(siteName = site, seName = 'se.%s' % (site), ceName = site)
            resourceControl.insertThreshold(siteName = site, taskType = 'Processing', \
                                            maxSlots = 10000, pendingSlots = 10000)


        self.testDir = self.testInit.generateWorkDir()


        # Set heartbeat
        for component in self.components:
            heartbeatAPI = HeartbeatAPI(component)
            heartbeatAPI.registerComponent()




        return
Beispiel #6
0
    def setUp(self):
        """
        _setUp_

        Standard setup: Now with 100% more couch
        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules = ["WMCore.WMBS", "WMCore.BossAir", "WMCore.ResourceControl", "WMCore.Agent.Database"])
        self.testInit.setupCouch("jobsubmitter_t/jobs", "JobDump")
        self.testInit.setupCouch("jobsubmitter_t/fwjrs", "FWJRDump")
        self.testInit.setupCouch("wmagent_summary_t", "WMStats")

        myThread = threading.currentThread()
        self.daoFactory = DAOFactory(package = "WMCore.WMBS",
                                     logger = myThread.logger,
                                     dbinterface = myThread.dbi)
        self.baDaoFactory = DAOFactory(package = "WMCore.BossAir",
                                       logger = myThread.logger,
                                       dbinterface = myThread.dbi)

        self.testDir = self.testInit.generateWorkDir()

        # Set heartbeat
        self.componentName = 'JobSubmitter'
        self.heartbeatAPI = HeartbeatAPI(self.componentName)
        self.heartbeatAPI.registerComponent()

        return
Beispiel #7
0
    def setUp(self):
        """
        _setUp_

        Setup the database and logging connection.  Try to create all of the
        WMBS tables.  Also, create some dummy locations.
        """

        myThread = threading.currentThread()

        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        #self.tearDown()
        self.testInit.setSchema(customModules = ['WMCore.WMBS',
                                                 'WMCore.ResourceControl',
                                                 'WMCore.Agent.Database'], useDefault = False)
        self.couchdbname = "jobcreator_t"
        self.testInit.setupCouch("%s/jobs" % self.couchdbname, "JobDump")
        self.testInit.setupCouch("%s/fwjrs" % self.couchdbname, "FWJRDump")


        myThread = threading.currentThread()
        self.daoFactory = DAOFactory(package = "WMCore.WMBS",
                                     logger = myThread.logger,
                                     dbinterface = myThread.dbi)

        locationAction = self.daoFactory(classname = "Locations.New")
        for site in self.sites:
            locationAction.execute(siteName = site, seName = site)



        #Create sites in resourceControl

        resourceControl = ResourceControl()
        for site in self.sites:
            resourceControl.insertSite(siteName = site, seName = site, ceName = site)
            resourceControl.insertThreshold(siteName = site, taskType = 'Processing', \
                                            maxSlots = 10000, pendingSlots = 10000)

        self.resourceControl = resourceControl



        self._setup = True
        self._teardown = False

        self.testDir = self.testInit.generateWorkDir()
        self.cwd = os.getcwd()

        # Set heartbeat
        self.componentName = 'JobCreator'
        self.heartbeatAPI  = HeartbeatAPI(self.componentName)
        self.heartbeatAPI.registerComponent()

        return
Beispiel #8
0
    def prepareToStart(self):
        """
        _prepareToStart_

        returns: Nothing

        Starts the initialization procedure. It is mainly an aggregation method
        so it can easily used in tests.
        """
        self.state = 'initialize'
        self.initInThread()
        # note: every component gets a (unique) name:
        # self.config.Agent.componentName
        logging.info(">>>Registering Component - %s",
                     self.config.Agent.componentName)

        if getattr(self.config.Agent, "useHeartbeat", True):
            self.heartbeatAPI = HeartbeatAPI(self.config.Agent.componentName)
            self.heartbeatAPI.registerComponent()

        logging.info('>>>Starting initialization')

        logging.info('>>>Setting default transaction')
        myThread = threading.currentThread()

        self.preInitialization()

        if myThread.sql_transaction:
            myThread.transaction.begin()

        self.initialization()
        self.postInitialization()

        if myThread.sql_transaction:
            myThread.transaction.commit()

        logging.info('>>>Committing default transaction')

        logging.info(">>>Starting worker threads")
        myThread.workerThreadManager.resumeWorkers()

        logging.info(">>>Initialization finished!\n")
        # wait for messages
        self.state = 'active'
Beispiel #9
0
    def setUp(self):
        """
        _setUp_
        
        setUp function for unittest

        """
        # Set constants
        self.couchDB      = "config_test"
        self.configURL    = "RANDOM;;URL;;NAME"
        self.configString = "This is a random string"
        
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules = 
                                ["WMComponent.DBS3Buffer",
                                 'WMCore.Agent.Database'],
                                useDefault = False)
        self.testInit.setupCouch(self.couchDB, "GroupUser", "ConfigCache")
      
        myThread = threading.currentThread()
        self.bufferFactory = DAOFactory(package = "WMComponent.DBSBuffer.Database",
                                        logger = myThread.logger,
                                        dbinterface = myThread.dbi)

        locationAction = self.bufferFactory(classname = "DBSBufferFiles.AddLocation")
        locationAction.execute(siteName = "se1.cern.ch")
        locationAction.execute(siteName = "se1.fnal.gov")
        locationAction.execute(siteName = "malpaquet") 


        # Set heartbeat
        self.componentName = 'JobSubmitter'
        self.heartbeatAPI  = HeartbeatAPI(self.componentName)
        self.heartbeatAPI.registerComponent()

        # Set up a config cache
        configCache = ConfigCache(os.environ["COUCHURL"], couchDBName = self.couchDB)
        configCache.createUserGroup(groupname = "testGroup", username = '******')
        self.testDir = self.testInit.generateWorkDir()

        psetPath = os.path.join(self.testDir, "PSet.txt")
        f = open(psetPath, 'w')
        f.write(self.configString)
        f.close()
        
        configCache.addConfig(newConfig = psetPath, psetHash = None)
        configCache.save()
        self.configURL = "%s;;%s;;%s" % (os.environ["COUCHURL"],
                                         self.couchDB,
                                         configCache.getCouchID())

        return
Beispiel #10
0
    def __init__(self, slaveClassName, totalSlaves, componentDir,
                 config, slaveInit = None, namespace = None):
        """
        __init__

        Constructor for the process pool.  The slave class name must be based
        inside the WMComponent namespace.  For examples, the JobAccountant would
        pass in 'JobAccountant.AccountantWorker' to run the AccountantWorker
        class.  All log files will be stored in the component directory that is
        passed in.  Each slave will have its own log file.

        Note that the config is only used to determine database connection
        parameters.  It is not passed to the slave class.  The slaveInit
        parameter will be serialized and passed to the slave class's
        constructor.
        """
        self.enqueueIndex = 0
        self.dequeueIndex = 0
        self.runningWork  = 0

        #Use the Services.Requests JSONizer, which handles __to_json__ calls
        self.jsonHandler = JSONRequests()
        
        # heartbeat should be registered at this point
        if getattr(config.Agent, "useHeartbeat", True):
            self.heartbeatAPI = HeartbeatAPI(getattr(config.Agent, "componentName", "ProcPoolSlave"))
            
        self.slaveClassName = slaveClassName
        self.componentDir   = componentDir
        self.config         = config
        # Grab the python version from the current version
        # Assume naming convention pythonA.B, i.e., python2.4 for v2.4.X
        majorVersion = sys.version_info[0]
        minorVersion = sys.version_info[1]

        if majorVersion and minorVersion:
            self.versionString = "python%i.%i" % (majorVersion, minorVersion)
        else:
            self.versionString = "python2.4"

        self.workers = []
        self.nSlaves = totalSlaves
        self.slaveInit = slaveInit
        self.namespace = namespace


        # Now actually create the slaves
        self.createSlaves()


        return
Beispiel #11
0
    def setUp(self):
        """
        _setUp_

        Setup the database and logging connection.  Try to create all of the
        Heartbeat tables.  Also add some dummy locations.
        """

        self.testInit = TestInit(__file__)
        self.testInit.setLogging()  # logLevel = logging.SQLDEBUG
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules=["WMCore.Agent.Database"],
                                useDefault=False)
        self.heartbeat = HeartbeatAPI("testComponent")
Beispiel #12
0
    def setUp(self):
        """
        Standard setup: Now with 100% more couch
        """

        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection(destroyAllDatabase=True)
        self.testInit.setSchema(
            customModules=["WMCore.WMBS", "WMCore.BossAir", "WMCore.ResourceControl", "WMCore.Agent.Database"],
            useDefault=False,
        )
        self.testInit.setupCouch("jobsubmitter_t/jobs", "JobDump")
        self.testInit.setupCouch("jobsubmitter_t/fwjrs", "FWJRDump")

        myThread = threading.currentThread()
        self.daoFactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi)

        locationAction = self.daoFactory(classname="Locations.New")
        locationSlots = self.daoFactory(classname="Locations.SetJobSlots")

        # We actually need the user name
        self.user = getpass.getuser()

        self.ceName = "127.0.0.1"

        # Create sites in resourceControl
        resourceControl = ResourceControl()
        for site in self.sites:
            resourceControl.insertSite(
                siteName=site,
                seName="se.%s" % (site),
                ceName=site,
                plugin="CondorPlugin",
                pendingSlots=10000,
                runningSlots=20000,
                cmsName=site,
            )
            resourceControl.insertThreshold(siteName=site, taskType="Processing", maxSlots=10000)

        self.testDir = self.testInit.generateWorkDir()

        # Set heartbeat
        self.componentName = "JobSubmitter"
        self.heartbeatAPI = HeartbeatAPI(self.componentName)
        self.heartbeatAPI.registerComponent()

        return
Beispiel #13
0
    def prepareToStart(self):
        """
        _prepareToStart_

        returns: Nothing

        Starts the initialization procedure. It is mainly an aggregation method
        so it can easily used in tests.
        """
        self.state = 'initialize'
        self.initInThread()
        # note: every component gets a (unique) name:
        # self.config.Agent.componentName
        logging.info(">>>Registering Component - %s" % self.config.Agent.componentName)

        if getattr(self.config.Agent, "useHeartbeat", True):
            self.heartbeatAPI = HeartbeatAPI(self.config.Agent.componentName)
            self.heartbeatAPI.registerComponent()

        logging.info('>>>Starting initialization')

        logging.info('>>>Setting default transaction')
        myThread = threading.currentThread()

        self.preInitialization()

        if myThread.sql_transaction:
            myThread.transaction.begin()

        self.initialization()
        self.postInitialization()

        if myThread.sql_transaction:
            myThread.transaction.commit()

        logging.info('>>>Committing default transaction')

        logging.info(">>>Starting worker threads")
        myThread.workerThreadManager.resumeWorkers()


        logging.info(">>>Initialization finished!\n")
        # wait for messages
        self.state = 'active'
Beispiel #14
0
    def prepareWorker(self, worker, idleTime):
        """
        Prepares a worker thread before running
        """
        # Work timing
        worker.idleTime = idleTime
        worker.component = self.component
        self.lock.acquire()
        self.slavecounter += 1
        worker.slaveid = "%s-%s" % (self.wtmnumber, self.slavecounter)
        self.lock.release()


        # Thread synchronisation
        worker.notifyTerminate = self.terminateSlaves
        worker.terminateCallback = self.slaveTerminateCallback
        worker.notifyPause = self.pauseSlaves
        worker.notifyResume = self.resumeSlaves
        if hasattr(self.component.config, "Agent"):
            if getattr(self.component.config.Agent, "useHeartbeat", True):
                worker.heartbeatAPI = HeartbeatAPI(self.component.config.Agent.componentName)
Beispiel #15
0
    def testHeartbeat(self):
        testComponent = HeartbeatAPI("testComponent")
        testComponent.pollInterval = 10
        testComponent.registerComponent()
        self.assertEqual(testComponent.getHeartbeatInfo(), [])

        testComponent.updateWorkerHeartbeat("testWorker")
        result = testComponent.getHeartbeatInfo()
        self.assertEqual(len(result), 1)
        self.assertEqual(result[0]['worker_name'], "testWorker")
        time.sleep(1)

        testComponent.updateWorkerHeartbeat("testWorker2")
        result = testComponent.getHeartbeatInfo()
        self.assertEqual(len(result), 1)
        self.assertEqual(result[0]['worker_name'], "testWorker2")

        time.sleep(1)
        testComponent.updateWorkerHeartbeat("testWorker")
        result = testComponent.getHeartbeatInfo()
        self.assertEqual(len(result), 1)
        self.assertEqual(result[0]['worker_name'], "testWorker")


        testComponent = HeartbeatAPI("test2Component")
        testComponent.pollInterval = 20
        testComponent.registerComponent()
        time.sleep(1)
        testComponent.updateWorkerHeartbeat("test2Worker")

        result = testComponent.getHeartbeatInfo()
        self.assertEqual(len(result), 2)
        self.assertEqual(result[0]['worker_name'], "testWorker")
        self.assertEqual(result[1]['worker_name'], "test2Worker")

        time.sleep(1)
        testComponent.updateWorkerHeartbeat("test2Worker2")
        result = testComponent.getHeartbeatInfo()
        self.assertEqual(len(result), 2)
        self.assertEqual(result[0]['worker_name'], "testWorker")
        self.assertEqual(result[1]['worker_name'], "test2Worker2")

        time.sleep(1)
        testComponent.updateWorkerHeartbeat("test2Worker")
        result = testComponent.getHeartbeatInfo()
        self.assertEqual(len(result), 2)
        self.assertEqual(result[0]['worker_name'], "testWorker")
        self.assertEqual(result[1]['worker_name'], "test2Worker")

        testComponent.updateWorkerError("test2Worker", "Error1")
        result = testComponent.getHeartbeatInfo()
        self.assertEqual(result[1]['error_message'], "Error1")
Beispiel #16
0
class ProcessPool:
    def __init__(self, slaveClassName, totalSlaves, componentDir,
                 config, slaveInit = None, namespace = None):
        """
        __init__

        Constructor for the process pool.  The slave class name must be based
        inside the WMComponent namespace.  For examples, the JobAccountant would
        pass in 'JobAccountant.AccountantWorker' to run the AccountantWorker
        class.  All log files will be stored in the component directory that is
        passed in.  Each slave will have its own log file.

        Note that the config is only used to determine database connection
        parameters.  It is not passed to the slave class.  The slaveInit
        parameter will be serialized and passed to the slave class's
        constructor.
        """
        self.enqueueIndex = 0
        self.dequeueIndex = 0
        self.runningWork  = 0

        #Use the Services.Requests JSONizer, which handles __to_json__ calls
        self.jsonHandler = JSONRequests()
        
        # heartbeat should be registered at this point
        if getattr(config.Agent, "useHeartbeat", True):
            self.heartbeatAPI = HeartbeatAPI(getattr(config.Agent, "componentName", "ProcPoolSlave"))
            
        self.slaveClassName = slaveClassName
        self.componentDir   = componentDir
        self.config         = config
        # Grab the python version from the current version
        # Assume naming convention pythonA.B, i.e., python2.4 for v2.4.X
        majorVersion = sys.version_info[0]
        minorVersion = sys.version_info[1]

        if majorVersion and minorVersion:
            self.versionString = "python%i.%i" % (majorVersion, minorVersion)
        else:
            self.versionString = "python2.4"

        self.workers = []
        self.nSlaves = totalSlaves
        self.slaveInit = slaveInit
        self.namespace = namespace


        # Now actually create the slaves
        self.createSlaves()


        return


    def createSlaves(self):
        """
        _createSlaves_

        Create the slaves by using the values from __init__()
        Moving it into a separate function allows us to restart
        all of them.
        """

        totalSlaves    = self.nSlaves
        slaveClassName = self.slaveClassName
        config         = self.config
        slaveInit      = self.slaveInit
        namespace      = self.namespace
        
        slaveArgs = [self.versionString, __file__, self.slaveClassName]
        if hasattr(config.CoreDatabase, "socket"):
            socket = config.CoreDatabase.socket
        else:
            socket = None

        (connectDialect, junk) = config.CoreDatabase.connectUrl.split(":", 1)
        if connectDialect.lower() == "mysql":
            dialect = "MySQL"
        elif connectDialect.lower() == "oracle":
            dialect = "Oracle"
        elif connectDialect.lower() == "sqlite":
            dialect = "SQLite"

        dbConfig = {"dialect": dialect,
                    "connectUrl": config.CoreDatabase.connectUrl,
                    "socket": socket,
                    "componentDir": self.componentDir}
        if namespace:
            # Then add a namespace to the config
            dbConfig['namespace'] = namespace
        encodedDBConfig = self.jsonHandler.encode(dbConfig)

        if slaveInit == None:
            encodedSlaveInit = None
        else:
            encodedSlaveInit = self.jsonHandler.encode(slaveInit)
        
        count = 0     
        while totalSlaves > 0:
            #For each worker you want create a slave process
            #That process calls this code (WMCore.ProcessPool) and opens
            #A process pool that loads the designated class
            slaveProcess = subprocess.Popen(slaveArgs, stdin = subprocess.PIPE,
                                            stdout = subprocess.PIPE)
            slaveProcess.stdin.write("%s\n" % encodedDBConfig)

            if encodedSlaveInit == None:
                slaveProcess.stdin.write("\n")
            else:
                slaveProcess.stdin.write("%s\n" % encodedSlaveInit)
                
            slaveProcess.stdin.flush()
            self.workers.append(WorkerProcess(subproc = slaveProcess))
            workerName = self._subProcessName(self.slaveClassName, count)
            
            if getattr(self.config.Agent, "useHeartbeat", True):
                self.heartbeatAPI.updateWorkerHeartbeat(workerName, 
                                            pid = slaveProcess.pid)
            totalSlaves -= 1
            count += 1


        return
    
    def _subProcessName(self, slaveClassName, sequence):
        """ subProcessName for heartbeat 
            could change to use process ID as a suffix
        """
        return "%s_%s" % (slaveClassName, sequence + 1)
            
    def __del__(self):
        """
        __del__

        Kill all the workers processes by sending them an invalid JSON object.
        This will cause them to shut down.
        """
        for worker in self.workers:
            try:
                worker.delete()
            except Exception, ex:
                pass

        self.workers = []

        return
Beispiel #17
0
class DBSUploadTest(unittest.TestCase):
    """
    TestCase for DBSUpload module

    Note:
      This fails if you use the in-memory syntax for sqlite
      i.e. (DATABASE = sqlite://)
    """

    _maxMessage = 10

    def setUp(self):
        """
        _setUp_

        setUp function for unittest

        """
        # Set constants
        self.couchDB = "config_test"
        self.configURL = "RANDOM;;URL;;NAME"
        self.configString = "This is a random string"

        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules=["WMComponent.DBS3Buffer", "WMCore.Agent.Database"], useDefault=False)
        self.testInit.setupCouch(self.couchDB, "GroupUser", "ConfigCache")

        myThread = threading.currentThread()
        self.bufferFactory = DAOFactory(
            package="WMComponent.DBSBuffer.Database", logger=myThread.logger, dbinterface=myThread.dbi
        )

        locationAction = self.bufferFactory(classname="DBSBufferFiles.AddLocation")
        locationAction.execute(siteName="se1.cern.ch")
        locationAction.execute(siteName="se1.fnal.gov")
        locationAction.execute(siteName="malpaquet")

        # Set heartbeat
        self.componentName = "JobSubmitter"
        self.heartbeatAPI = HeartbeatAPI(self.componentName)
        self.heartbeatAPI.registerComponent()

        # Set up a config cache
        configCache = ConfigCache(os.environ["COUCHURL"], couchDBName=self.couchDB)
        configCache.createUserGroup(groupname="testGroup", username="******")
        self.testDir = self.testInit.generateWorkDir()

        psetPath = os.path.join(self.testDir, "PSet.txt")
        f = open(psetPath, "w")
        f.write(self.configString)
        f.close()

        configCache.addConfig(newConfig=psetPath, psetHash=None)
        configCache.save()
        self.configURL = "%s;;%s;;%s" % (os.environ["COUCHURL"], self.couchDB, configCache.getCouchID())

        return

    def tearDown(self):
        """
        _tearDown_

        tearDown function for unittest
        """

        self.testInit.clearDatabase(modules=["WMComponent.DBS3Buffer", "WMCore.Agent.Database"])

    def createConfig(self):
        """
        _createConfig_

        This creates the actual config file used by the component

        """
        config = Configuration()

        # First the general stuff
        config.section_("General")
        config.General.workDir = os.getenv("TESTDIR", os.getcwd())

        config.section_("Agent")
        config.Agent.componentName = "DBSUpload"
        config.Agent.useHeartbeat = False

        # Now the CoreDatabase information
        # This should be the dialect, dburl, etc
        config.section_("CoreDatabase")
        config.CoreDatabase.connectUrl = os.getenv("DATABASE")
        config.CoreDatabase.socket = os.getenv("DBSOCK")

        config.component_("DBSUpload")
        config.DBSUpload.pollInterval = 10
        config.DBSUpload.logLevel = "ERROR"
        config.DBSUpload.maxThreads = 1
        config.DBSUpload.namespace = "WMComponent.DBSUpload.DBSUpload"
        config.DBSUpload.componentDir = os.path.join(os.getcwd(), "Components")
        config.DBSUpload.workerThreads = 4

        config.section_("DBSInterface")
        config.DBSInterface.globalDBSUrl = "http://vocms09.cern.ch:8880/cms_dbs_int_local_xx_writer/servlet/DBSServlet"
        config.DBSInterface.globalDBSVersion = "DBS_2_0_9"
        config.DBSInterface.DBSUrl = "http://vocms09.cern.ch:8880/cms_dbs_int_local_yy_writer/servlet/DBSServlet"
        config.DBSInterface.DBSVersion = "DBS_2_0_9"
        config.DBSInterface.DBSBlockMaxFiles = 10
        config.DBSInterface.DBSBlockMaxSize = 9999999999
        config.DBSInterface.DBSBlockMaxTime = 10000
        config.DBSInterface.MaxFilesToCommit = 10

        # addition for Alerts messaging framework, work (alerts) and control
        # channel addresses to which the component will be sending alerts
        # these are destination addresses where AlertProcessor:Receiver listens
        config.section_("Alert")
        config.Alert.address = "tcp://127.0.0.1:5557"
        config.Alert.controlAddr = "tcp://127.0.0.1:5559"
        # configure threshold of DBS upload queue size alert threshold
        # reference: trac ticket #1628
        config.DBSUpload.alertUploadQueueSize = 2000

        return config

    def getFiles(self, name, tier, nFiles=12, site="malpaquet"):
        """
        Create some quick dummy test files


        """

        files = []

        for f in range(0, nFiles):
            testFile = DBSBufferFile(lfn="%s-%s-%i" % (name, site, f), size=1024, events=20, checksums={"cksum": 1})
            testFile.setAlgorithm(
                appName=name, appVer="CMSSW_3_1_1", appFam="RECO", psetHash="GIBBERISH", configContent=self.configURL
            )
            testFile.setDatasetPath("/%s/%s/%s" % (name, name, tier))
            testFile.addRun(Run(1, *[f]))
            testFile.setGlobalTag("aGlobalTag")
            testFile.create()
            testFile.setLocation(site)
            files.append(testFile)

        testFileChild = DBSBufferFile(lfn="%s-%s-child" % (name, site), size=1024, events=10, checksums={"cksum": 1})
        testFileChild.setAlgorithm(
            appName=name, appVer="CMSSW_3_1_1", appFam="RECO", psetHash="GIBBERISH", configContent=self.configURL
        )
        testFileChild.setDatasetPath("/%s/%s_2/RECO" % (name, name))
        testFileChild.addRun(Run(1, *[45]))
        testFileChild.setGlobalTag("aGlobalTag")
        testFileChild.create()
        testFileChild.setLocation(site)

        testFileChild.addParents([x["lfn"] for x in files])

        return files

    @attr("integration")
    def testA_basicUploadTest(self):
        """
        _basicUploadTest_

        Do everything simply once
        Create dataset, algo, files, blocks,
        upload them,
        mark as done, finish them, migrate them
        Also check the timeout
        """
        myThread = threading.currentThread()
        config = self.createConfig()
        config.DBSInterface.DBSBlockMaxTime = 3
        config.DBSUpload.pollInterval = 4

        name = "ThisIsATest_%s" % (makeUUID())
        tier = "RECO"
        nFiles = 12
        files = self.getFiles(name=name, tier=tier, nFiles=nFiles)
        datasetPath = "/%s/%s/%s" % (name, name, tier)

        # Load components that are necessary to check status
        factory = WMFactory("dbsUpload", "WMComponent.DBSUpload.Database.Interface")
        dbinterface = factory.loadObject("UploadToDBS")

        dbsInterface = DBSInterface(config=config)
        localAPI = dbsInterface.getAPIRef()
        globeAPI = dbsInterface.getAPIRef(globalRef=True)

        # In the first round we should create blocks for the first dataset
        # The child dataset should not be handled until the parent is uploaded
        testDBSUpload = DBSUploadPoller(config=config)
        testDBSUpload.algorithm()

        # First, see if there are any blocks
        # One in DBS, one not in DBS
        result = myThread.dbi.processData("SELECT status FROM dbsbuffer_block")[0].fetchall()
        self.assertEqual(len(result), 2)
        self.assertEqual(result, [("InGlobalDBS",), ("Open",)])

        # Check to see if datasets and algos are in local DBS
        result = listAlgorithms(apiRef=localAPI, patternExe=name)
        self.assertEqual(len(result), 1)
        self.assertEqual(result[0]["ExecutableName"], name)
        result = listPrimaryDatasets(apiRef=localAPI, match=name)
        self.assertEqual(result, [name])
        result = listProcessedDatasets(apiRef=localAPI, primary=name, dataTier="*")

        # Then check and see that the closed block made it into local DBS
        affectedBlocks = listBlocks(apiRef=localAPI, datasetPath=datasetPath)
        if affectedBlocks[0]["OpenForWriting"] == "0":
            self.assertEqual(affectedBlocks[1]["OpenForWriting"], "1")
            self.assertEqual(affectedBlocks[0]["NumberOfFiles"], 10)
            self.assertEqual(affectedBlocks[1]["NumberOfFiles"], 2)
        else:
            self.assertEqual(affectedBlocks[0]["OpenForWriting"], "1")
            self.assertEqual(affectedBlocks[1]["NumberOfFiles"], 10)
            self.assertEqual(affectedBlocks[0]["NumberOfFiles"], 2)

        # Check to make sure all the files are in local
        result = listDatasetFiles(apiRef=localAPI, datasetPath=datasetPath)
        fileLFNs = [x["lfn"] for x in files]
        for lfn in fileLFNs:
            self.assertTrue(lfn in result)

        # Make sure the child files aren't there
        flag = False
        try:
            listDatasetFiles(apiRef=localAPI, datasetPath="/%s/%s_2/%s" % (name, name, tier))
        except Exception, ex:
            flag = True
        self.assertTrue(flag)

        # There should be one blocks in global
        # It should have ten files and be closed
        result = listBlocks(apiRef=globeAPI, datasetPath=datasetPath)
        self.assertEqual(len(result), 1)
        for block in result:
            self.assertEqual(block["OpenForWriting"], "0")
            self.assertTrue(block["NumberOfFiles"] in [2, 10])

        # Okay, deep breath.  First round done
        # In the second round, the second block of the parent fileset should transfer
        # Make sure that the timeout functions work
        time.sleep(10)
        testDBSUpload.algorithm()

        result = myThread.dbi.processData("SELECT status FROM dbsbuffer_block")[0].fetchall()
        self.assertEqual(len(result), 2)
        self.assertEqual(result, [("InGlobalDBS",), ("InGlobalDBS",)])

        # Check to make sure all the files are in global
        result = listDatasetFiles(apiRef=globeAPI, datasetPath=datasetPath)
        for lfn in fileLFNs:
            self.assertTrue(lfn in result)

        # Make sure the child files aren't there
        flag = False
        try:
            listDatasetFiles(apiRef=localAPI, datasetPath="/%s/%s_2/%s" % (name, name, tier))
        except Exception, ex:
            flag = True
Beispiel #18
0
class DBSUploadTest(unittest.TestCase):
    """
    _DBSUploadTest_

    TestCase for DBSUpload module
    """

    _maxMessage = 10

    def setUp(self):
        """
        _setUp_

        setUp function for unittest
        """
        # Set constants
        self.couchDB = "config_test"
        self.configURL = "RANDOM;;URL;;NAME"
        self.configString = "This is a random string"

        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(
            customModules=["WMComponent.DBS3Buffer", 'WMCore.Agent.Database'],
            useDefault=False)
        self.testInit.setupCouch(self.couchDB, "GroupUser", "ConfigCache")

        myThread = threading.currentThread()
        self.bufferFactory = DAOFactory(
            package="WMComponent.DBSBuffer.Database",
            logger=myThread.logger,
            dbinterface=myThread.dbi)
        self.buffer3Factory = DAOFactory(package="WMComponent.DBS3Buffer",
                                         logger=myThread.logger,
                                         dbinterface=myThread.dbi)

        locationAction = self.bufferFactory(
            classname="DBSBufferFiles.AddLocation")
        locationAction.execute(siteName="se1.cern.ch")
        locationAction.execute(siteName="se1.fnal.gov")
        locationAction.execute(siteName="malpaquet")

        # Set heartbeat
        self.componentName = 'JobSubmitter'
        self.heartbeatAPI = HeartbeatAPI(self.componentName)
        self.heartbeatAPI.registerComponent()

        # Set up a config cache
        configCache = ConfigCache(os.environ["COUCHURL"],
                                  couchDBName=self.couchDB)
        configCache.createUserGroup(groupname="testGroup", username='******')
        self.testDir = self.testInit.generateWorkDir()

        psetPath = os.path.join(self.testDir, "PSet.txt")
        f = open(psetPath, 'w')
        f.write(self.configString)
        f.close()

        configCache.addConfig(newConfig=psetPath, psetHash=None)
        configCache.save()
        self.configURL = "%s;;%s;;%s" % (os.environ["COUCHURL"], self.couchDB,
                                         configCache.getCouchID())
        return

    def tearDown(self):
        """
        _tearDown_

        tearDown function for unittest
        """

        self.testInit.clearDatabase()
        self.testInit.tearDownCouch()
        self.testInit.delWorkDir()
        return

    def createConfig(self):
        """
        _createConfig_

        This creates the actual config file used by the component

        """
        config = Configuration()

        #First the general stuff
        config.section_("General")
        config.General.workDir = os.getenv("TESTDIR", os.getcwd())

        config.section_("Agent")
        config.Agent.componentName = 'DBSUpload'
        config.Agent.useHeartbeat = False

        #Now the CoreDatabase information
        #This should be the dialect, dburl, etc
        config.section_("CoreDatabase")
        config.CoreDatabase.connectUrl = os.getenv("DATABASE")
        config.CoreDatabase.socket = os.getenv("DBSOCK")

        config.component_("DBSUpload")
        config.DBSUpload.pollInterval = 10
        config.DBSUpload.logLevel = 'ERROR'
        config.DBSUpload.maxThreads = 1
        config.DBSUpload.namespace = 'WMComponent.DBSUpload.DBSUpload'
        config.DBSUpload.componentDir = os.path.join(os.getcwd(), 'Components')
        config.DBSUpload.workerThreads = 4

        config.section_("DBSInterface")
        config.DBSInterface.globalDBSUrl = 'http://vocms09.cern.ch:8880/cms_dbs_int_local_xx_writer/servlet/DBSServlet'
        config.DBSInterface.globalDBSVersion = 'DBS_2_0_9'
        config.DBSInterface.DBSUrl = 'http://vocms09.cern.ch:8880/cms_dbs_int_local_yy_writer/servlet/DBSServlet'
        config.DBSInterface.DBSVersion = 'DBS_2_0_9'
        config.DBSInterface.MaxFilesToCommit = 10

        # addition for Alerts messaging framework, work (alerts) and control
        # channel addresses to which the component will be sending alerts
        # these are destination addresses where AlertProcessor:Receiver listens
        config.section_("Alert")
        config.Alert.address = "tcp://127.0.0.1:5557"
        config.Alert.controlAddr = "tcp://127.0.0.1:5559"
        # configure threshold of DBS upload queue size alert threshold
        # reference: trac ticket #1628
        config.DBSUpload.alertUploadQueueSize = 2000

        return config

    def injectWorkflow(self,
                       workflowName='TestWorkflow',
                       taskPath='/TestWorkflow/ReadingEvents',
                       MaxWaitTime=10000,
                       MaxFiles=10,
                       MaxEvents=250000000,
                       MaxSize=9999999999):
        """
        _injectWorklow_

        Inject a dummy worklow in DBSBuffer for testing,
        returns the workflow ID
        """
        injectWorkflowDAO = self.buffer3Factory("InsertWorkflow")
        workflowID = injectWorkflowDAO.execute(workflowName, taskPath,
                                               MaxWaitTime, MaxFiles,
                                               MaxEvents, MaxSize)
        return workflowID

    def getFiles(self,
                 name,
                 tier,
                 nFiles=12,
                 site="malpaquet",
                 workflowName=None,
                 taskPath=None,
                 noChild=False):
        """
        Create some quick dummy test files
        """

        if workflowName is not None and taskPath is not None:
            workflowId = self.injectWorkflow(workflowName=workflowName,
                                             taskPath=taskPath)
        else:
            workflowId = self.injectWorkflow()

        files = []

        for f in range(0, nFiles):
            testFile = DBSBufferFile(lfn='%s-%s-%i' % (name, site, f),
                                     size=1024,
                                     events=20,
                                     checksums={'cksum': 1},
                                     workflowId=workflowId)
            testFile.setAlgorithm(appName=name,
                                  appVer="CMSSW_3_1_1",
                                  appFam="RECO",
                                  psetHash="GIBBERISH",
                                  configContent=self.configURL)
            testFile.setDatasetPath("/%s/%s/%s" % (name, name, tier))
            testFile.addRun(Run(1, *[f]))
            testFile.setGlobalTag("aGlobalTag")
            testFile.create()
            testFile.setLocation(site)
            files.append(testFile)

        if not noChild:
            testFileChild = DBSBufferFile(lfn='%s-%s-child' % (name, site),
                                          size=1024,
                                          events=10,
                                          checksums={'cksum': 1},
                                          workflowId=workflowId)
            testFileChild.setAlgorithm(appName=name,
                                       appVer="CMSSW_3_1_1",
                                       appFam="RECO",
                                       psetHash="GIBBERISH",
                                       configContent=self.configURL)
            testFileChild.setDatasetPath("/%s/%s_2/RECO" % (name, name))
            testFileChild.addRun(Run(1, *[45]))
            testFileChild.setGlobalTag("aGlobalTag")
            testFileChild.create()
            testFileChild.setLocation(site)

            testFileChild.addParents([x['lfn'] for x in files])

        return files

    @attr('integration')
    def testA_basicUploadTest(self):
        """
        _basicUploadTest_

        Do everything simply once
        Create dataset, algo, files, blocks,
        upload them,
        mark as done, finish them, migrate them
        Also check the timeout
        """
        myThread = threading.currentThread()
        config = self.createConfig()
        self.injectWorkflow(MaxWaitTime=3)
        config.DBSUpload.pollInterval = 4

        name = "ThisIsATest_%s" % (makeUUID())
        tier = "RECO"
        nFiles = 12
        files = self.getFiles(name=name, tier=tier, nFiles=nFiles)
        datasetPath = '/%s/%s/%s' % (name, name, tier)

        # Load components that are necessary to check status
        factory = WMFactory("dbsUpload",
                            "WMComponent.DBSUpload.Database.Interface")
        dbinterface = factory.loadObject("UploadToDBS")

        dbsInterface = DBSInterface(config=config)
        localAPI = dbsInterface.getAPIRef()
        globeAPI = dbsInterface.getAPIRef(globalRef=True)

        # In the first round we should create blocks for the first dataset
        # The child dataset should not be handled until the parent is uploaded
        testDBSUpload = DBSUploadPoller(config=config)
        testDBSUpload.algorithm()

        # First, see if there are any blocks
        # One in DBS, one not in DBS
        result = myThread.dbi.processData(
            "SELECT status FROM dbsbuffer_block")[0].fetchall()
        self.assertEqual(len(result), 2)
        self.assertEqual(result, [('InGlobalDBS', ), ('Open', )])

        # Check to see if datasets and algos are in local DBS
        result = listAlgorithms(apiRef=localAPI, patternExe=name)
        self.assertEqual(len(result), 1)
        self.assertEqual(result[0]['ExecutableName'], name)
        result = listPrimaryDatasets(apiRef=localAPI, match=name)
        self.assertEqual(result, [name])
        result = listProcessedDatasets(apiRef=localAPI,
                                       primary=name,
                                       dataTier="*")

        # Then check and see that the closed block made it into local DBS
        affectedBlocks = listBlocks(apiRef=localAPI, datasetPath=datasetPath)
        if affectedBlocks[0]['OpenForWriting'] == '0':
            self.assertEqual(affectedBlocks[1]['OpenForWriting'], '1')
            self.assertEqual(affectedBlocks[0]['NumberOfFiles'], 10)
            self.assertEqual(affectedBlocks[1]['NumberOfFiles'], 2)
        else:
            self.assertEqual(affectedBlocks[0]['OpenForWriting'], '1')
            self.assertEqual(affectedBlocks[1]['NumberOfFiles'], 10)
            self.assertEqual(affectedBlocks[0]['NumberOfFiles'], 2)

        # Check to make sure all the files are in local
        result = listDatasetFiles(apiRef=localAPI, datasetPath=datasetPath)
        fileLFNs = [x['lfn'] for x in files]
        for lfn in fileLFNs:
            self.assertTrue(lfn in result)

        # Make sure the child files aren't there
        flag = False
        try:
            listDatasetFiles(apiRef=localAPI,
                             datasetPath='/%s/%s_2/%s' % (name, name, tier))
        except Exception as ex:
            flag = True
        self.assertTrue(flag)

        # There should be one blocks in global
        # It should have ten files and be closed
        result = listBlocks(apiRef=globeAPI, datasetPath=datasetPath)
        self.assertEqual(len(result), 1)
        for block in result:
            self.assertEqual(block['OpenForWriting'], '0')
            self.assertTrue(block['NumberOfFiles'] in [2, 10])

        # Okay, deep breath.  First round done
        # In the second round, the second block of the parent fileset should transfer
        # Make sure that the timeout functions work
        time.sleep(10)
        testDBSUpload.algorithm()

        result = myThread.dbi.processData(
            "SELECT status FROM dbsbuffer_block")[0].fetchall()
        self.assertEqual(len(result), 2)
        self.assertEqual(result, [('InGlobalDBS', ), ('InGlobalDBS', )])

        # Check to make sure all the files are in global
        result = listDatasetFiles(apiRef=globeAPI, datasetPath=datasetPath)
        for lfn in fileLFNs:
            self.assertTrue(lfn in result)

        # Make sure the child files aren't there
        flag = False
        try:
            listDatasetFiles(apiRef=localAPI,
                             datasetPath='/%s/%s_2/%s' % (name, name, tier))
        except Exception as ex:
            flag = True
        self.assertTrue(flag)

        # Third round
        # Both of the parent blocks should have transferred
        # So the child block should now transfer
        testDBSUpload.algorithm()

        result = myThread.dbi.processData(
            "SELECT status FROM dbsbuffer_block")[0].fetchall()
        self.assertEqual(result, [('InGlobalDBS', ), ('InGlobalDBS', ),
                                  ('Open', )])

        flag = False
        try:
            result = listDatasetFiles(apiRef=localAPI,
                                      datasetPath='/%s/%s_2/%s' %
                                      (name, name, tier))
        except Exception as ex:
            flag = True
        self.assertFalse(flag)

        self.assertEqual(len(result), 1)

        return

    @attr('integration')
    def testB_AlgoMigration(self):
        """
        _AlgoMigration_

        Test our ability to migrate multiple algos to global

        Do this by creating, mid-poll, two separate batches of files
        One with the same dataset but a different algo
        One with the same algo, but a different dataset
        See that they both get to global
        """
        #raise nose.SkipTest
        myThread = threading.currentThread()
        config = self.createConfig()
        self.injectWorkflow(MaxWaitTime=20)
        name = "ThisIsATest_%s" % (makeUUID())
        tier = "RECO"
        nFiles = 12
        files = self.getFiles(name=name, tier=tier, nFiles=nFiles)
        datasetPath = '/%s/%s/%s' % (name, name, tier)

        # Load components that are necessary to check status
        factory = WMFactory("dbsUpload",
                            "WMComponent.DBSUpload.Database.Interface")
        dbinterface = factory.loadObject("UploadToDBS")

        dbsInterface = DBSInterface(config=config)
        localAPI = dbsInterface.getAPIRef()
        globeAPI = dbsInterface.getAPIRef(globalRef=True)

        testDBSUpload = DBSUploadPoller(config=config)
        testDBSUpload.algorithm()

        # There should now be one block
        result = listBlocks(apiRef=globeAPI, datasetPath=datasetPath)
        self.assertEqual(len(result), 1)

        # Okay, by now, the first migration should have gone through.
        # Now create a second batch of files with the same dataset
        # but a different algo.
        for i in range(0, nFiles):
            testFile = DBSBufferFile(lfn='%s-batch2-%i' % (name, i),
                                     size=1024,
                                     events=20,
                                     checksums={'cksum': 1},
                                     locations="malpaquet")
            testFile.setAlgorithm(appName="cmsRun",
                                  appVer="CMSSW_3_1_1",
                                  appFam=tier,
                                  psetHash="GIBBERISH_PART2",
                                  configContent=self.configURL)
            testFile.setDatasetPath(datasetPath)
            testFile.addRun(Run(1, *[46]))
            testFile.create()

        # Have to do things twice to get parents
        testDBSUpload.algorithm()
        testDBSUpload.algorithm()

        # There should now be two blocks
        result = listBlocks(apiRef=globeAPI, datasetPath=datasetPath)
        self.assertEqual(len(result), 2)

        # Now create another batch of files with the original algo
        # But in a different dataset
        for i in range(0, nFiles):
            testFile = DBSBufferFile(lfn='%s-batch3-%i' % (name, i),
                                     size=1024,
                                     events=20,
                                     checksums={'cksum': 1},
                                     locations="malpaquet")
            testFile.setAlgorithm(appName=name,
                                  appVer="CMSSW_3_1_1",
                                  appFam=tier,
                                  psetHash="GIBBERISH",
                                  configContent=self.configURL)
            testFile.setDatasetPath('/%s/%s_3/%s' % (name, name, tier))
            testFile.addRun(Run(1, *[46]))
            testFile.create()

        # Do it twice for parentage.
        testDBSUpload.algorithm()
        testDBSUpload.algorithm()

        # There should now be one block
        result = listBlocks(apiRef=globeAPI,
                            datasetPath='/%s/%s_3/%s' % (name, name, tier))
        self.assertEqual(len(result), 1)

        # Well, all the blocks got there, so we're done
        return

    @attr('integration')
    def testC_FailTest(self):
        """
        _FailTest_

        THIS TEST IS DANGEROUS!
        Figure out what happens when we trigger rollbacks
        """
        myThread = threading.currentThread()
        config = self.createConfig()
        config.DBSUpload.abortStepTwo = True

        originalOut = sys.stdout
        originalErr = sys.stderr

        dbsInterface = DBSInterface(config=config)
        localAPI = dbsInterface.getAPIRef()
        globeAPI = dbsInterface.getAPIRef(globalRef=True)

        name = "ThisIsATest_%s" % (makeUUID())
        tier = "RECO"
        nFiles = 12
        files = self.getFiles(name=name, tier=tier, nFiles=nFiles)
        datasetPath = '/%s/%s/%s' % (name, name, tier)

        testDBSUpload = DBSUploadPoller(config=config)

        try:
            testDBSUpload.algorithm()
        except Exception as ex:
            pass

        # Aborting in step two should result in no results
        result = myThread.dbi.processData(
            "SELECT status FROM dbsbuffer_block")[0].fetchall()
        self.assertEqual(len(result), 0)

        config.DBSUpload.abortStepTwo = False
        config.DBSUpload.abortStepThree = True
        testDBSUpload = DBSUploadPoller(config=config)

        try:
            testDBSUpload.algorithm()
        except Exception as ex:
            pass

        result = myThread.dbi.processData(
            "SELECT status FROM dbsbuffer_block")[0].fetchall()
        self.assertEqual(result, [('Pending', ), ('Open', )])
        result = myThread.dbi.processData(
            "SELECT status FROM dbsbuffer_file WHERE dataset_algo = 1"
        )[0].fetchall()
        for res in result:
            self.assertEqual(res[0], 'READY')

        config.DBSUpload.abortStepThree = False
        self.injectWorkflow(MaxWaitTime=300)
        testDBSUpload = DBSUploadPoller(config=config)
        testDBSUpload.algorithm()

        # After this, one block should have been uploaded, one should still be open
        # This is the result of the pending block updating, and the open block staying open
        result = myThread.dbi.processData(
            "SELECT status, id FROM dbsbuffer_block")[0].fetchall()
        self.assertEqual(result, [('InGlobalDBS', 3), ('Open', 4)])

        # Check that one block got there
        result = listBlocks(apiRef=globeAPI, datasetPath=datasetPath)
        self.assertEqual(len(result), 1)
        self.assertEqual(result[0]['NumberOfFiles'], 10)
        self.assertEqual(result[0]['NumberOfEvents'], 200)
        self.assertEqual(result[0]['BlockSize'], 10240)

        # Check that ten files got there
        result = listDatasetFiles(apiRef=globeAPI, datasetPath=datasetPath)
        self.assertEqual(len(result), 10)

        myThread.dbi.processData(
            "UPDATE dbsbuffer_workflow SET block_close_max_wait_time = 1")
        testDBSUpload = DBSUploadPoller(config=config)
        time.sleep(3)
        testDBSUpload.algorithm()

        result = myThread.dbi.processData(
            "SELECT status FROM dbsbuffer_block")[0].fetchall()
        self.assertEqual(result, [('InGlobalDBS', ), ('InGlobalDBS', )])

        result = listDatasetFiles(apiRef=globeAPI, datasetPath=datasetPath)
        self.assertEqual(len(result), 12)

        fileLFNs = [x['lfn'] for x in files]
        for lfn in fileLFNs:
            self.assertTrue(lfn in result)

        testDBSUpload.algorithm()
        result = myThread.dbi.processData(
            "SELECT status FROM dbsbuffer_block")[0].fetchall()
        self.assertEqual(result, [('InGlobalDBS', ), ('InGlobalDBS', ),
                                  ('Open', )])

        time.sleep(5)
        testDBSUpload.algorithm()
        time.sleep(2)
        result = myThread.dbi.processData(
            "SELECT status FROM dbsbuffer_block")[0].fetchall()
        self.assertEqual(result, [('InGlobalDBS', ), ('InGlobalDBS', ),
                                  ('InGlobalDBS', )])

        result = listDatasetFiles(apiRef=globeAPI,
                                  datasetPath='/%s/%s_2/%s' %
                                  (name, name, tier))
        self.assertEqual(len(result), 1)

        sys.stdout = originalOut
        sys.stderr = originalErr

        return

    @attr('integration')
    def testD_Profile(self):
        """
        _Profile_

        Profile with cProfile and time various pieces
        """
        return
        config = self.createConfig()

        name = "ThisIsATest_%s" % (makeUUID())
        tier = "RECO"
        nFiles = 500
        files = self.getFiles(name=name, tier=tier, nFiles=nFiles)
        datasetPath = '/%s/%s/%s' % (name, name, tier)

        testDBSUpload = DBSUploadPoller(config=config)
        cProfile.runctx("testDBSUpload.algorithm()",
                        globals(),
                        locals(),
                        filename="testStats.stat")

        p = pstats.Stats('testStats.stat')
        p.sort_stats('cumulative')
        p.print_stats(0.2)

        return

    @attr('integration')
    def testE_NoMigration(self):
        """
        _NoMigration_

        Test the DBSUpload system with no global migration
        """
        myThread = threading.currentThread()
        config = self.createConfig()
        self.injectWorkflow(MaxWaitTime=3)
        config.DBSInterface.doGlobalMigration = False
        config.DBSUpload.pollInterval = 4

        name = "ThisIsATest_%s" % (makeUUID())
        tier = "RECO"
        nFiles = 12
        files = self.getFiles(name=name, tier=tier, nFiles=nFiles)
        datasetPath = '/%s/%s/%s' % (name, name, tier)

        # Load components that are necessary to check status
        factory = WMFactory("dbsUpload",
                            "WMComponent.DBSUpload.Database.Interface")
        dbinterface = factory.loadObject("UploadToDBS")

        dbsInterface = DBSInterface(config=config)
        localAPI = dbsInterface.getAPIRef()
        globeAPI = dbsInterface.getAPIRef(globalRef=True)

        # In the first round we should create blocks for the first dataset
        # The child dataset should not be handled until the parent is uploaded
        testDBSUpload = DBSUploadPoller(config=config)
        testDBSUpload.algorithm()

        # First, see if there are any blocks
        # One in DBS, one not in DBS
        result = myThread.dbi.processData(
            "SELECT status FROM dbsbuffer_block")[0].fetchall()
        self.assertEqual(len(result), 2)
        self.assertEqual(result, [('InGlobalDBS', ), ('Open', )])

        result = myThread.dbi.processData(
            "SELECT status FROM dbsbuffer_file WHERE dataset_algo = 1"
        )[0].fetchall()
        for r in result:
            self.assertEqual(r[0], 'GLOBAL')

        return

    @attr('integration')
    def testF_DBSUploadQueueSizeCheckForAlerts(self):
        """
        Test will not trigger a real alert being sent unless doing some
        mocking of the methods used during DBSUploadPoller.algorithm() ->
        DBSUploadPoller.uploadBlocks() method.
        As done here, it probably can't be deterministic, yet the feature
        shall be checked.

        """
        sizeLevelToTest = 1
        myThread = threading.currentThread()
        config = self.createConfig()
        # threshold / value to check
        config.DBSUpload.alertUploadQueueSize = sizeLevelToTest

        # without this uploadBlocks method returns immediately
        name = "ThisIsATest_%s" % (makeUUID())
        tier = "RECO"
        nFiles = sizeLevelToTest + 1
        files = self.getFiles(name=name, tier=tier, nFiles=nFiles)
        datasetPath = '/%s/%s/%s' % (name, name, tier)

        # load components that are necessary to check status
        # (this seems necessary, else some previous tests started failing)
        factory = WMFactory("dbsUpload",
                            "WMComponent.DBSUpload.Database.Interface")
        dbinterface = factory.loadObject("UploadToDBS")

        dbsInterface = DBSInterface(config=config)
        localAPI = dbsInterface.getAPIRef()
        globeAPI = dbsInterface.getAPIRef(globalRef=True)
        testDBSUpload = DBSUploadPoller(config)
        # this is finally where the action (alert) should be triggered from
        testDBSUpload.algorithm()

        return

    def testG_closeSettingsPerWorkflow(self):
        """
        _closeSettingsPerWorkflow_

        Test our ability to close blocks depending on settings
        configured for individual workflows.
        This unit test that doesn't require an actual DBS instance to run.
        """
        self.assertTrue(
            False, 'This unit test disabled since we do not have DBS2 mock')
        myThread = threading.currentThread()
        config = self.createConfig()
        config.DBSInterface.doGlobalMigration = False

        # First test, limit by number of files and timeout without new files
        name = "ThisIsATest_%s" % (makeUUID())
        tier = "RECO"
        nFiles = 12
        self.injectWorkflow(workflowName=name,
                            taskPath='/%s/Test' % name,
                            MaxFiles=5)
        self.getFiles(name=name,
                      tier=tier,
                      nFiles=nFiles,
                      workflowName=name,
                      taskPath='/%s/Test' % name)

        # Load components that are necessary to check status
        factory = WMFactory("dbsUpload",
                            "WMComponent.DBSUpload.Database.Interface")
        dbinterface = factory.loadObject("UploadToDBS")

        # Change the DBSUploadPoller imports on runtime
        from WMComponent.DBSUpload import DBSUploadPoller as MockDBSUploadPoller
        #MockDBSUploadPoller.DBSInterface = DBS2Interface

        # In the first round we should create blocks for the first dataset
        # The child dataset should not be handled until the parent is uploaded
        # First run creates 3 blocks, 2 are closed immediately and one is open
        testDBSUpload = MockDBSUploadPoller.DBSUploadPoller(config=config)
        testDBSUpload.algorithm()
        openBlocks = dbinterface.findOpenBlocks()
        closedBlocks = myThread.dbi.processData(
            "SELECT id FROM dbsbuffer_block WHERE status = 'InGlobalDBS'"
        )[0].fetchall()
        self.assertEqual(len(openBlocks), 1)
        self.assertEqual(len(closedBlocks), 2)
        globalFiles = myThread.dbi.processData(
            "SELECT id FROM dbsbuffer_file WHERE status = 'GLOBAL'"
        )[0].fetchall()
        notUploadedFiles = myThread.dbi.processData(
            "SELECT * FROM dbsbuffer_file WHERE status = 'NOTUPLOADED'"
        )[0].fetchall()
        self.assertEqual(len(globalFiles), 12)
        self.assertEqual(len(notUploadedFiles), 1)
        self.assertTrue('child' in notUploadedFiles[0][1])
        testDBSUpload.algorithm()
        openBlocks = myThread.dbi.processData(
            "SELECT id FROM dbsbuffer_block WHERE status != 'InGlobalDBS'"
        )[0].fetchall()
        closedBlocks = myThread.dbi.processData(
            "SELECT id FROM dbsbuffer_block WHERE status = 'InGlobalDBS'"
        )[0].fetchall()
        self.assertEqual(len(openBlocks), 2)
        self.assertEqual(len(closedBlocks), 2)
        globalFiles = myThread.dbi.processData(
            "SELECT id FROM dbsbuffer_file WHERE status = 'GLOBAL'"
        )[0].fetchall()
        notUploadedFiles = myThread.dbi.processData(
            "SELECT * FROM dbsbuffer_file WHERE status = 'NOTUPLOADED'"
        )[0].fetchall()
        self.assertEqual(len(globalFiles), 13)
        self.assertEqual(len(notUploadedFiles), 0)
        # Test the timeout feature to close blocks
        myThread.dbi.processData(
            "UPDATE dbsbuffer_workflow SET block_close_max_wait_time = 0")
        testDBSUpload.algorithm()
        openBlocks = myThread.dbi.processData(
            "SELECT id FROM dbsbuffer_block WHERE status != 'InGlobalDBS'"
        )[0].fetchall()
        closedBlocks = myThread.dbi.processData(
            "SELECT id FROM dbsbuffer_block WHERE status = 'InGlobalDBS'"
        )[0].fetchall()
        self.assertEqual(len(openBlocks), 0)
        self.assertEqual(len(closedBlocks), 4)
        # Check the information that DBS received
        dbsBlocks = testDBSUpload.dbsInterface.blocks
        for dbsBlockName in dbsBlocks:
            dbsBlock = dbsBlocks[dbsBlockName]
            self.assertEqual(dbsBlock['OpenForWriting'], '0')
            self.assertTrue(dbsBlock['nFiles'] in (1, 2, 5))

        # Second test, limit by number of events and timeout with new files
        name = "ThisIsATest_%s" % (makeUUID())
        nFiles = 50
        self.injectWorkflow(workflowName=name,
                            taskPath='/%s/Test' % name,
                            MaxFiles=45,
                            MaxEvents=800,
                            MaxWaitTime=10000)
        self.getFiles(name=name,
                      tier=tier,
                      nFiles=nFiles,
                      workflowName=name,
                      taskPath='/%s/Test' % name)
        testDBSUpload.algorithm()
        testDBSUpload.algorithm()
        openBlocks = myThread.dbi.processData(
            "SELECT id FROM dbsbuffer_block WHERE status != 'InGlobalDBS'"
        )[0].fetchall()
        closedBlocks = myThread.dbi.processData(
            "SELECT id FROM dbsbuffer_block WHERE status = 'InGlobalDBS'"
        )[0].fetchall()
        self.assertEqual(len(openBlocks), 2)
        self.assertEqual(len(closedBlocks), 5)
        # Throw 20 new file
        # Reset the timer such that the blocks appear to have been created 10001 seconds ago
        creationTime = int(time.time() - 10001)
        myThread.dbi.processData(
            "UPDATE dbsbuffer_block SET create_time = %d WHERE status != 'InGlobalDBS'"
            % creationTime)
        self.getFiles(name=name + '2',
                      tier=tier,
                      nFiles=20,
                      workflowName=name,
                      taskPath='/%s/Test' % name,
                      noChild=True)
        # Now a new block will have to be created as the last one timed out
        testDBSUpload.algorithm()
        openBlocks = myThread.dbi.processData(
            "SELECT id FROM dbsbuffer_block WHERE status != 'InGlobalDBS'"
        )[0].fetchall()
        closedBlocks = myThread.dbi.processData(
            "SELECT id FROM dbsbuffer_block WHERE status = 'InGlobalDBS'"
        )[0].fetchall()
        self.assertEqual(len(openBlocks), 1)
        self.assertEqual(len(closedBlocks), 7)
        dbsBlocks = testDBSUpload.dbsInterface.blocks
        for dbsBlockName in dbsBlocks:
            dbsBlock = dbsBlocks[dbsBlockName]
            if name in dbsBlockName:
                if dbsBlock['OpenForWriting'] == '1':
                    self.assertEqual(dbsBlock['nFiles'], 20)
                else:
                    self.assertTrue(dbsBlock['events'] in (10, 200, 800))
                    self.assertTrue(dbsBlock['nFiles'] in (1, 10, 40))

        # Last test, check limitation by size
        name = "ThisIsATest_%s" % (makeUUID())
        nFiles = 10
        self.injectWorkflow(workflowName=name,
                            taskPath='/%s/Test' % name,
                            MaxFiles=45,
                            MaxEvents=800,
                            MaxSize=2048)
        self.getFiles(name=name,
                      tier=tier,
                      nFiles=nFiles,
                      workflowName=name,
                      taskPath='/%s/Test' % name)
        testDBSUpload.algorithm()
        dbsBlocks = testDBSUpload.dbsInterface.blocks
        for dbsBlockName in dbsBlocks:
            dbsBlock = dbsBlocks[dbsBlockName]
            if name in dbsBlockName:
                self.assertEqual(dbsBlock['events'], 40)
                self.assertEqual(dbsBlock['nFiles'], 2)
                self.assertEqual(dbsBlock['size'], 2048)

        return
Beispiel #19
0
    def testAddComponent(self):
        """
        _testAddComponent_

        Test creation of components and worker threads as well as the
        get heartbeat DAOs
        """
        comp1 = HeartbeatAPI("testComponent1",
                             pollInterval=60,
                             heartbeatTimeout=600)
        comp1.registerComponent()
        self.assertEqual(comp1.getHeartbeatInfo(), [])  # no worker thread yet

        comp1.registerWorker("testWorker1")
        self.assertEqual(len(comp1.getHeartbeatInfo()), 1)

        comp1.registerWorker("testWorker2")
        self.assertEqual(len(comp1.getHeartbeatInfo()), 2)

        comp2 = HeartbeatAPI("testComponent2",
                             pollInterval=30,
                             heartbeatTimeout=300)
        comp2.registerComponent()
        self.assertEqual(comp2.getHeartbeatInfo(), [])  # no worker thread yet
        self.assertEqual(len(comp2.getAllHeartbeatInfo()), 2)

        comp2.registerWorker("testWorker21")
        self.assertEqual(len(comp2.getHeartbeatInfo()), 1)
        self.assertEqual(len(comp2.getAllHeartbeatInfo()), 3)

        comp1.updateWorkerHeartbeat("testWorker1", "Running")
        comp1.updateWorkerHeartbeat("testWorker2", "Running")
        comp2.updateWorkerHeartbeat("testWorker21", "Running")
        self.assertEqual(len(comp1.getAllHeartbeatInfo()), 3)
        self.assertEqual(len(comp2.getAllHeartbeatInfo()), 3)

        comp1Res = comp1.getHeartbeatInfo()
        comp2Res = comp2.getHeartbeatInfo()
        self.assertEqual(len(comp1Res), 2)
        self.assertEqual(len(comp2Res), 1)

        self.assertItemsEqual([item["name"] for item in comp1Res],
                              ["testComponent1", "testComponent1"])
        self.assertItemsEqual([item["worker_name"] for item in comp1Res],
                              ["testWorker1", "testWorker2"])
        self.assertItemsEqual([item["state"] for item in comp1Res],
                              ["Running", "Running"])
        self.assertItemsEqual([item["poll_interval"] for item in comp1Res],
                              [60, 60])
        self.assertItemsEqual([item["update_threshold"] for item in comp1Res],
                              [600, 600])

        self.assertItemsEqual([item["name"] for item in comp2Res],
                              ["testComponent2"])
        self.assertItemsEqual([item["worker_name"] for item in comp2Res],
                              ["testWorker21"])
        self.assertItemsEqual([item["state"] for item in comp2Res],
                              ["Running"])
        self.assertItemsEqual([item["poll_interval"] for item in comp2Res],
                              [30])
        self.assertItemsEqual([item["update_threshold"] for item in comp2Res],
                              [300])
Beispiel #20
0
    def testHeartbeat(self):
        testComponent = HeartbeatAPI("testComponent")
        testComponent.registerComponent()
        self.assertEqual(testComponent.getHeartbeatInfo(), [])

        testComponent.updateWorkerHeartbeat("testWorker")
        result = testComponent.getHeartbeatInfo()
        self.assertEqual(len(result), 1)
        self.assertEqual(result[0]['worker_name'], "testWorker")
        time.sleep(1)

        testComponent.updateWorkerHeartbeat("testWorker2")
        result = testComponent.getHeartbeatInfo()
        self.assertEqual(len(result), 1)
        self.assertEqual(result[0]['worker_name'], "testWorker2")

        time.sleep(1)
        testComponent.updateWorkerHeartbeat("testWorker")
        result = testComponent.getHeartbeatInfo()
        self.assertEqual(len(result), 1)
        self.assertEqual(result[0]['worker_name'], "testWorker")

        testComponent = HeartbeatAPI("test2Component")
        testComponent.registerComponent()
        time.sleep(1)
        testComponent.updateWorkerHeartbeat("test2Worker")

        result = testComponent.getHeartbeatInfo()
        self.assertEqual(len(result), 2)
        self.assertEqual(result[0]['worker_name'], "testWorker")
        self.assertEqual(result[1]['worker_name'], "test2Worker")

        time.sleep(1)
        testComponent.updateWorkerHeartbeat("test2Worker2")
        result = testComponent.getHeartbeatInfo()
        self.assertEqual(len(result), 2)
        self.assertEqual(result[0]['worker_name'], "testWorker")
        self.assertEqual(result[1]['worker_name'], "test2Worker2")

        time.sleep(1)
        testComponent.updateWorkerHeartbeat("test2Worker")
        result = testComponent.getHeartbeatInfo()
        self.assertEqual(len(result), 2)
        self.assertEqual(result[0]['worker_name'], "testWorker")
        self.assertEqual(result[1]['worker_name'], "test2Worker")

        testComponent.updateWorkerError("test2Worker", "Error1")
        result = testComponent.getHeartbeatInfo()
        self.assertEqual(result[1]['error_message'], "Error1")
Beispiel #21
0
    def testUpdateWorkers(self):
        """
        _testUpdateWorkers_

        Create a couple of components and workers and test the update methods
        """
        comp1 = HeartbeatAPI("testComponent1", pollInterval=60, heartbeatTimeout=600)
        comp1.registerComponent()
        comp1.registerWorker("testWorker1")
        comp1.registerWorker("testWorker2")

        comp2 = HeartbeatAPI("testComponent2", pollInterval=30, heartbeatTimeout=300)
        comp2.registerComponent()
        comp2.registerWorker("testWorker21")

        comp1.updateWorkerCycle("testWorker1", 1.001, None)
        comp2.updateWorkerCycle("testWorker21", 1234.1, 100)
        hb1 = comp1.getHeartbeatInfo()
        hb2 = comp2.getHeartbeatInfo()

        for worker in hb1:
            if worker['worker_name'] == 'testWorker1':
                self.assertTrue(worker["cycle_time"] > 1.0)
            else:
                self.assertEqual(worker["cycle_time"], 0)
        self.assertItemsEqual([item["outcome"] for item in hb1], [None, None])
        self.assertItemsEqual([item["error_message"] for item in hb1], [None, None])

        self.assertEqual(round(hb2[0]["cycle_time"], 1), 1234.1)
        self.assertEqual(hb2[0]["outcome"], '100')
        self.assertEqual(hb2[0]["error_message"], None)

        # time to update workers with an error
        comp1.updateWorkerError("testWorker2", "BAD JOB!!!")
        hb1 = comp1.getHeartbeatInfo()
        for worker in hb1:
            if worker['worker_name'] == 'testWorker2':
                self.assertTrue(worker["last_error"] > int(time.time() - 10))
                self.assertEqual(worker["state"], "Error")
                self.assertEqual(worker["error_message"], "BAD JOB!!!")
Beispiel #22
0
class JobCreatorTest(unittest.TestCase):
    """
    Test case for the JobCreator

    """

    sites = ['T2_US_Florida', 'T2_US_UCSD', 'T2_TW_Taiwan', 'T1_CH_CERN']

    def setUp(self):
        """
        _setUp_

        Setup the database and logging connection.  Try to create all of the
        WMBS tables.  Also, create some dummy locations.
        """

        myThread = threading.currentThread()

        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        #self.tearDown()
        self.testInit.setSchema(customModules=[
            'WMCore.WMBS', 'WMCore.ResourceControl', 'WMCore.Agent.Database'
        ],
                                useDefault=False)
        self.couchdbname = "jobcreator_t"
        self.testInit.setupCouch("%s/jobs" % self.couchdbname, "JobDump")
        self.testInit.setupCouch("%s/fwjrs" % self.couchdbname, "FWJRDump")
        self.configFile = EmulatorSetup.setupWMAgentConfig()

        myThread = threading.currentThread()
        self.daoFactory = DAOFactory(package="WMCore.WMBS",
                                     logger=myThread.logger,
                                     dbinterface=myThread.dbi)

        locationAction = self.daoFactory(classname="Locations.New")
        for site in self.sites:
            locationAction.execute(siteName=site, seName=site)

        #Create sites in resourceControl

        resourceControl = ResourceControl()
        for site in self.sites:
            resourceControl.insertSite(siteName=site, seName=site, ceName=site)
            resourceControl.insertThreshold(siteName = site, taskType = 'Processing', \
                                            maxSlots = 10000, pendingSlots = 10000)

        self.resourceControl = resourceControl

        self._setup = True
        self._teardown = False

        self.testDir = self.testInit.generateWorkDir()
        self.cwd = os.getcwd()

        # Set heartbeat
        self.componentName = 'JobCreator'
        self.heartbeatAPI = HeartbeatAPI(self.componentName)
        self.heartbeatAPI.registerComponent()

        return

    def tearDown(self):
        """
        _tearDown_

        Drop all the WMBS tables.
        """

        myThread = threading.currentThread()

        self.testInit.clearDatabase(modules=[
            'WMCore.WMBS', 'WMCore.ResourceControl', 'WMCore.Agent.Database'
        ])

        self.testInit.delWorkDir()

        self._teardown = True

        self.testInit.tearDownCouch()
        EmulatorSetup.deleteConfig(self.configFile)

        return

    def createJobCollection(self, name, nSubs, nFiles, workflowURL='test'):
        """
        _createJobCollection_

        Create a collection of jobs
        """

        myThread = threading.currentThread()

        testWorkflow = Workflow(spec=workflowURL,
                                owner="mnorman",
                                name=name,
                                task="/TestWorkload/ReReco")
        testWorkflow.create()

        for sub in range(nSubs):

            nameStr = '%s-%i' % (name, sub)

            myThread.transaction.begin()

            testFileset = Fileset(name=nameStr)
            testFileset.create()

            for f in range(nFiles):
                # pick a random site
                site = random.choice(self.sites)
                testFile = File(lfn="/lfn/%s/%i" % (nameStr, f),
                                size=1024,
                                events=10)
                testFile.setLocation(site)
                testFile.create()
                testFileset.addFile(testFile)

            testFileset.commit()
            testSubscription = Subscription(fileset=testFileset,
                                            workflow=testWorkflow,
                                            type="Processing",
                                            split_algo="FileBased")
            testSubscription.create()

            myThread.transaction.commit()

        return

    def createWorkload(self, workloadName='Test', emulator=True, priority=1):
        """
        _createTestWorkload_

        Creates a test workload for us to run on, hold the basic necessities.
        """

        workload = testWorkload("Tier1ReReco")
        rereco = workload.getTask("ReReco")
        seederDict = {
            "generator.initialSeed": 1001,
            "evtgenproducer.initialSeed": 1001
        }
        rereco.addGenerator("PresetSeeder", **seederDict)

        taskMaker = TaskMaker(workload,
                              os.path.join(self.testDir, 'workloadTest'))
        taskMaker.skipSubscription = True
        taskMaker.processWorkload()

        return workload

    def getConfig(self):
        """
        _getConfig_

        Creates a common config.
        """

        myThread = threading.currentThread()

        config = self.testInit.getConfiguration()
        self.testInit.generateWorkDir(config)

        #First the general stuff
        config.section_("General")
        config.General.workDir = os.getenv("TESTDIR", os.getcwd())

        config.section_("Agent")
        config.Agent.componentName = self.componentName

        #Now the CoreDatabase information
        #This should be the dialect, dburl, etc
        config.section_("CoreDatabase")
        config.CoreDatabase.connectUrl = os.getenv("DATABASE")
        config.CoreDatabase.socket = os.getenv("DBSOCK")

        config.component_("JobCreator")
        config.JobCreator.namespace = 'WMComponent.JobCreator.JobCreator'
        #The log level of the component.
        #config.JobCreator.logLevel = 'SQLDEBUG'
        config.JobCreator.logLevel = 'INFO'

        # maximum number of threads we want to deal
        # with messages per pool.
        config.JobCreator.maxThreads = 1
        config.JobCreator.UpdateFromResourceControl = True
        config.JobCreator.pollInterval = 10
        #config.JobCreator.jobCacheDir               = self.testDir
        config.JobCreator.defaultJobType = 'processing'  #Type of jobs that we run, used for resource control
        config.JobCreator.workerThreads = 4
        config.JobCreator.componentDir = self.testDir
        config.JobCreator.useWorkQueue = True
        config.JobCreator.WorkQueueParams = {'emulateDBSReader': True}

        # We now call the JobMaker from here
        config.component_('JobMaker')
        config.JobMaker.logLevel = 'INFO'
        config.JobMaker.namespace = 'WMCore.WMSpec.Makers.JobMaker'
        config.JobMaker.maxThreads = 1
        config.JobMaker.makeJobsHandler = 'WMCore.WMSpec.Makers.Handlers.MakeJobs'

        #JobStateMachine
        config.component_('JobStateMachine')
        config.JobStateMachine.couchurl = os.getenv('COUCHURL',
                                                    'cmssrv52.fnal.gov:5984')
        config.JobStateMachine.couchDBName = self.couchdbname

        return config

    def testA_VerySimpleTest(self):
        """
        _VerySimpleTest_

        Just test that everything works...more or less
        """

        #return

        myThread = threading.currentThread()

        config = self.getConfig()

        name = makeUUID()
        nSubs = 5
        nFiles = 10
        workloadName = 'TestWorkload'

        workload = self.createWorkload(workloadName=workloadName)
        workloadPath = os.path.join(self.testDir, 'workloadTest',
                                    'TestWorkload', 'WMSandbox',
                                    'WMWorkload.pkl')

        self.createJobCollection(name=name,
                                 nSubs=nSubs,
                                 nFiles=nFiles,
                                 workflowURL=workloadPath)

        testJobCreator = JobCreatorPoller(config=config)

        # First, can we run once without everything crashing?
        testJobCreator.algorithm()

        getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs")
        result = getJobsAction.execute(state='Created', jobType="Processing")

        self.assertEqual(len(result), nSubs * nFiles)

        # Count database objects
        result = myThread.dbi.processData(
            'SELECT * FROM wmbs_sub_files_acquired')[0].fetchall()
        self.assertEqual(len(result), nSubs * nFiles)

        # Find the test directory
        testDirectory = os.path.join(self.testDir, 'jobCacheDir',
                                     'TestWorkload', 'ReReco')
        # It should have at least one jobGroup
        self.assertTrue('JobCollection_1_0' in os.listdir(testDirectory))
        # But no more then twenty
        self.assertTrue(len(os.listdir(testDirectory)) <= 20)

        groupDirectory = os.path.join(testDirectory, 'JobCollection_1_0')

        # First job should be in here
        listOfDirs = []
        for tmpDirectory in os.listdir(testDirectory):
            listOfDirs.extend(
                os.listdir(os.path.join(testDirectory, tmpDirectory)))
        self.assertTrue('job_1' in listOfDirs)
        self.assertTrue('job_2' in listOfDirs)
        self.assertTrue('job_3' in listOfDirs)
        jobDir = os.listdir(groupDirectory)[0]
        jobFile = os.path.join(groupDirectory, jobDir, 'job.pkl')
        self.assertTrue(os.path.isfile(jobFile))
        f = open(jobFile, 'r')
        job = cPickle.load(f)
        f.close()

        self.assertEqual(job.baggage.PresetSeeder.generator.initialSeed, 1001)
        self.assertEqual(job.baggage.PresetSeeder.evtgenproducer.initialSeed,
                         1001)

        self.assertEqual(job['workflow'], name)
        self.assertEqual(len(job['input_files']), 1)
        self.assertEqual(os.path.basename(job['sandbox']),
                         'TestWorkload-Sandbox.tar.bz2')

        return

    @attr('performance')
    def testB_ProfilePoller(self):
        """
        Profile your performance
        You shouldn't be running this normally because it doesn't do anything

        """

        return

        myThread = threading.currentThread()

        name = makeUUID()
        nSubs = 5
        nFiles = 1500
        workloadName = 'TestWorkload'

        workload = self.createWorkload(workloadName=workloadName)
        workloadPath = os.path.join(self.testDir, 'workloadTest',
                                    'TestWorkload', 'WMSandbox',
                                    'WMWorkload.pkl')

        self.createJobCollection(name=name,
                                 nSubs=nSubs,
                                 nFiles=nFiles,
                                 workflowURL=workloadPath)

        config = self.getConfig()

        testJobCreator = JobCreatorPoller(config=config)
        cProfile.runctx("testJobCreator.algorithm()",
                        globals(),
                        locals(),
                        filename="testStats.stat")

        getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs")
        result = getJobsAction.execute(state='Created', jobType="Processing")

        time.sleep(10)

        self.assertEqual(len(result), nSubs * nFiles)

        p = pstats.Stats('testStats.stat')
        p.sort_stats('cumulative')
        p.print_stats(.2)

        return

    def testC_ProfileWorker(self):
        """
        Profile where the work actually gets done
        You shouldn't be running this one either, since it doesn't test anything.
        """

        return

        myThread = threading.currentThread()

        name = makeUUID()
        nSubs = 5
        nFiles = 500
        workloadName = 'TestWorkload'

        workload = self.createWorkload(workloadName=workloadName)
        workloadPath = os.path.join(self.testDir, 'workloadTest',
                                    'TestWorkload', 'WMSandbox',
                                    'WMWorkload.pkl')

        self.createJobCollection(name=name,
                                 nSubs=nSubs,
                                 nFiles=nFiles,
                                 workflowURL=workloadPath)

        config = self.getConfig()

        configDict = {
            "couchURL": config.JobStateMachine.couchurl,
            "couchDBName": config.JobStateMachine.couchDBName,
            'jobCacheDir': config.JobCreator.jobCacheDir,
            'defaultJobType': config.JobCreator.defaultJobType
        }

        input = [{
            "subscription": 1
        }, {
            "subscription": 2
        }, {
            "subscription": 3
        }, {
            "subscription": 4
        }, {
            "subscription": 5
        }]

        testJobCreator = JobCreatorPoller(**configDict)
        cProfile.runctx("testJobCreator.algorithm(parameters = input)",
                        globals(),
                        locals(),
                        filename="workStats.stat")

        p = pstats.Stats('workStats.stat')
        p.sort_stats('cumulative')
        p.print_stats(.2)

        return

    def testD_HugeTest(self):
        """
        Don't run this one either

        """

        return

        myThread = threading.currentThread()

        config = self.getConfig()

        name = makeUUID()
        nSubs = 10
        nFiles = 5000
        workloadName = 'Tier1ReReco'

        workload = self.createWorkload(workloadName=workloadName)
        workloadPath = os.path.join(self.testDir, 'workloadTest',
                                    'TestWorkload', 'WMSandbox',
                                    'WMWorkload.pkl')

        self.createJobCollection(name=name,
                                 nSubs=nSubs,
                                 nFiles=nFiles,
                                 workflowURL=workloadPath)

        testJobCreator = JobCreatorPoller(config=config)

        # First, can we run once without everything crashing?
        startTime = time.time()
        testJobCreator.algorithm()
        stopTime = time.time()

        getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs")
        result = getJobsAction.execute(state='Created', jobType="Processing")

        self.assertEqual(len(result), nSubs * nFiles)

        print("Job took %f seconds to run" % (stopTime - startTime))

        # Count database objects
        result = myThread.dbi.processData(
            'SELECT * FROM wmbs_sub_files_acquired')[0].fetchall()
        self.assertEqual(len(result), nSubs * nFiles)

        return

    def stuffWMBS(self, workflowURL, name):
        """
        _stuffWMBS_

        Insert some dummy jobs, jobgroups, filesets, files and subscriptions
        into WMBS to test job creation.  Three completed job groups each
        containing several files are injected.  Another incomplete job group is
        also injected.  Also files are added to the "Mergeable" subscription as
        well as to the output fileset for their jobgroups.
        """
        locationAction = self.daoFactory(classname="Locations.New")
        locationAction.execute(siteName="s1", seName="somese.cern.ch")

        changeStateDAO = self.daoFactory(classname="Jobs.ChangeState")

        mergeFileset = Fileset(name="mergeFileset")
        mergeFileset.create()
        bogusFileset = Fileset(name="bogusFileset")
        bogusFileset.create()

        mergeWorkflow = Workflow(spec=workflowURL,
                                 owner="mnorman",
                                 name=name,
                                 task="/TestWorkload/ReReco")
        mergeWorkflow.create()

        mergeSubscription = Subscription(fileset=mergeFileset,
                                         workflow=mergeWorkflow,
                                         split_algo="ParentlessMergeBySize")
        mergeSubscription.create()
        bogusSubscription = Subscription(fileset=bogusFileset,
                                         workflow=mergeWorkflow,
                                         split_algo="ParentlessMergeBySize")

        file1 = File(lfn="file1",
                     size=1024,
                     events=1024,
                     first_event=0,
                     locations=set(["somese.cern.ch"]))
        file1.addRun(Run(1, *[45]))
        file1.create()
        file2 = File(lfn="file2",
                     size=1024,
                     events=1024,
                     first_event=1024,
                     locations=set(["somese.cern.ch"]))
        file2.addRun(Run(1, *[45]))
        file2.create()
        file3 = File(lfn="file3",
                     size=1024,
                     events=1024,
                     first_event=2048,
                     locations=set(["somese.cern.ch"]))
        file3.addRun(Run(1, *[45]))
        file3.create()
        file4 = File(lfn="file4",
                     size=1024,
                     events=1024,
                     first_event=3072,
                     locations=set(["somese.cern.ch"]))
        file4.addRun(Run(1, *[45]))
        file4.create()

        fileA = File(lfn="fileA",
                     size=1024,
                     events=1024,
                     first_event=0,
                     locations=set(["somese.cern.ch"]))
        fileA.addRun(Run(1, *[46]))
        fileA.create()
        fileB = File(lfn="fileB",
                     size=1024,
                     events=1024,
                     first_event=1024,
                     locations=set(["somese.cern.ch"]))
        fileB.addRun(Run(1, *[46]))
        fileB.create()
        fileC = File(lfn="fileC",
                     size=1024,
                     events=1024,
                     first_event=2048,
                     locations=set(["somese.cern.ch"]))
        fileC.addRun(Run(1, *[46]))
        fileC.create()

        fileI = File(lfn="fileI",
                     size=1024,
                     events=1024,
                     first_event=0,
                     locations=set(["somese.cern.ch"]))
        fileI.addRun(Run(2, *[46]))
        fileI.create()
        fileII = File(lfn="fileII",
                      size=1024,
                      events=1024,
                      first_event=1024,
                      locations=set(["somese.cern.ch"]))
        fileII.addRun(Run(2, *[46]))
        fileII.create()
        fileIII = File(lfn="fileIII",
                       size=1024,
                       events=102400,
                       first_event=2048,
                       locations=set(["somese.cern.ch"]))
        fileIII.addRun(Run(2, *[46]))
        fileIII.create()
        fileIV = File(lfn="fileIV",
                      size=102400,
                      events=1024,
                      first_event=3072,
                      locations=set(["somese.cern.ch"]))
        fileIV.addRun(Run(2, *[46]))
        fileIV.create()

        for file in [
                file1, file2, file3, file4, fileA, fileB, fileC, fileI, fileII,
                fileIII, fileIV
        ]:
            mergeFileset.addFile(file)
            bogusFileset.addFile(file)

        mergeFileset.commit()
        bogusFileset.commit()

        return

    def testE_TestNonProxySplitting(self):
        """
        _TestNonProxySplitting_

        Test and see if we can split things without
        a proxy.
        """

        myThread = threading.currentThread()

        config = self.getConfig()
        config.JobCreator.workerThreads = 1

        name = makeUUID()
        workloadName = 'TestWorkload'

        workload = self.createWorkload(workloadName=workloadName)

        # Change the file splitting algo
        procTask = workload.getTask("ReReco")
        procTask.setSplittingAlgorithm("ParentlessMergeBySize",
                                       min_merge_size=1,
                                       max_merge_size=100000,
                                       max_merge_events=200000)

        workloadPath = os.path.join(self.testDir, 'workloadTest',
                                    'TestWorkload', 'WMSandbox',
                                    'WMWorkload.pkl')

        self.stuffWMBS(workflowURL=workloadPath, name=name)

        testJobCreator = JobCreatorPoller(config=config)

        testJobCreator.algorithm()

        getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs")
        result = getJobsAction.execute(state='Created', jobType="Processing")
        self.assertEqual(len(result), 1)

        result = getJobsAction.execute(state='Created', jobType="Merge")
        self.assertEqual(len(result), 0)

        return
Beispiel #23
0
class JobSubmitterTest(unittest.TestCase):
    """
    _JobSubmitterTest_

    Test class for the JobSubmitterPoller
    """

    def setUp(self):
        """
        _setUp_

        Standard setup: Now with 100% more couch
        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules = ["WMCore.WMBS", "WMCore.BossAir", "WMCore.ResourceControl", "WMCore.Agent.Database"])
        self.testInit.setupCouch("jobsubmitter_t/jobs", "JobDump")
        self.testInit.setupCouch("jobsubmitter_t/fwjrs", "FWJRDump")
        self.testInit.setupCouch("wmagent_summary_t", "WMStats")

        myThread = threading.currentThread()
        self.daoFactory = DAOFactory(package = "WMCore.WMBS",
                                     logger = myThread.logger,
                                     dbinterface = myThread.dbi)
        self.baDaoFactory = DAOFactory(package = "WMCore.BossAir",
                                       logger = myThread.logger,
                                       dbinterface = myThread.dbi)

        self.testDir = self.testInit.generateWorkDir()

        # Set heartbeat
        self.componentName = 'JobSubmitter'
        self.heartbeatAPI = HeartbeatAPI(self.componentName)
        self.heartbeatAPI.registerComponent()

        return

    def tearDown(self):
        """
        _tearDown_

        Standard tearDown
        """
        self.testInit.clearDatabase()
        self.testInit.delWorkDir()
        self.testInit.tearDownCouch()
        return

    def setResourceThresholds(self, site, **options):
        """
        _setResourceThresholds_

        Utility to set resource thresholds
        """
        if not options:
            options = {'state'        : 'Normal',
                       'runningSlots' : 10,
                       'pendingSlots' : 5,
                       'tasks' : ['Processing', 'Merge'],
                       'Processing' : {'pendingSlots' : 5,
                                       'runningSlots' : 10},
                       'Merge' : {'pendingSlots' : 2,
                                  'runningSlots' : 5}}

        resourceControl = ResourceControl()
        resourceControl.insertSite(siteName = site, seName = 'se.%s' % (site),
                                   ceName = site, plugin = "MockPlugin", pendingSlots = options['pendingSlots'],
                                   runningSlots = options['runningSlots'], cmsName = site)
        for task in options['tasks']:
            resourceControl.insertThreshold(siteName = site, taskType = task,
                                            maxSlots = options[task]['runningSlots'],
                                            pendingSlots = options[task]['pendingSlots'])
        if options.get('state'):
            resourceControl.changeSiteState(site, options.get('state'))

        return

    def createJobGroups(self, nSubs, nJobs, task, workloadSpec, site,
                        bl = [], wl = [], taskType = 'Processing', name = None):
        """
        _createJobGroups_

        Creates a series of jobGroups for submissions
        """

        jobGroupList = []

        if name is None:
            name = makeUUID()

        testWorkflow = Workflow(spec = workloadSpec, owner = "mnorman",
                                name = name, task = "basicWorkload/Production")
        testWorkflow.create()

        # Create subscriptions
        for _ in range(nSubs):

            name = makeUUID()

            # Create Fileset, Subscription, jobGroup
            testFileset = Fileset(name = name)
            testFileset.create()
            testSubscription = Subscription(fileset = testFileset,
                                            workflow = testWorkflow,
                                            type = taskType,
                                            split_algo = "FileBased")
            testSubscription.create()

            testJobGroup = JobGroup(subscription = testSubscription)
            testJobGroup.create()

            # Create jobs
            self.makeNJobs(name = name, task = task,
                           nJobs = nJobs,
                           jobGroup = testJobGroup,
                           fileset = testFileset,
                           sub = testSubscription.exists(),
                           site = site, bl = bl, wl = wl)

            testFileset.commit()
            testJobGroup.commit()
            jobGroupList.append(testJobGroup)

        return jobGroupList

    def makeNJobs(self, name, task, nJobs, jobGroup, fileset, sub, site, bl = [], wl = []):
        """
        _makeNJobs_

        Make and return a WMBS Job and File
        This handles all those damn add-ons

        """
        # Set the CacheDir
        cacheDir = os.path.join(self.testDir, 'CacheDir')

        for n in range(nJobs):
            # First make a file
            #site = self.sites[0]
            testFile = File(lfn = "/singleLfn/%s/%s" % (name, n),
                            size = 1024, events = 10)
            if type(site) == list:
                for singleSite in site:
                    testFile.setLocation(singleSite)
            else:
                testFile.setLocation(site)
            testFile.create()
            fileset.addFile(testFile)


        fileset.commit()

        index = 0
        for f in fileset.files:
            index += 1
            testJob = Job(name = '%s-%i' % (name, index))
            testJob.addFile(f)
            testJob["location"] = f.getLocations()[0]
            testJob['task'] = task.getPathName()
            testJob['sandbox'] = task.data.input.sandbox
            testJob['spec'] = os.path.join(self.testDir, 'basicWorkload.pcl')
            testJob['mask']['FirstEvent'] = 101
            testJob["siteBlacklist"] = bl
            testJob["siteWhitelist"] = wl
            testJob['priority'] = 101
            jobCache = os.path.join(cacheDir, 'Sub_%i' % (sub), 'Job_%i' % (index))
            os.makedirs(jobCache)
            testJob.create(jobGroup)
            testJob['cache_dir'] = jobCache
            testJob.save()
            jobGroup.add(testJob)
            output = open(os.path.join(jobCache, 'job.pkl'), 'w')
            pickle.dump(testJob, output)
            output.close()

        return testJob, testFile

    def getConfig(self):
        """
        _getConfig_

        Gets a basic config from default location
        """

        config = Configuration()

        config.component_("Agent")
        config.Agent.WMSpecDirectory = self.testDir
        config.Agent.agentName       = 'testAgent'
        config.Agent.componentName   = self.componentName
        config.Agent.useHeartbeat    = False


        #First the general stuff
        config.section_("General")
        config.General.workDir = os.getenv("TESTDIR", self.testDir)

        #Now the CoreDatabase information
        config.section_("CoreDatabase")
        config.CoreDatabase.connectUrl = os.getenv("DATABASE")
        config.CoreDatabase.socket     = os.getenv("DBSOCK")

        # BossAir and MockPlugin configuration
        config.section_("BossAir")
        config.BossAir.pluginNames = ['MockPlugin']
        config.BossAir.pluginDir   = 'WMCore.BossAir.Plugins'
        config.BossAir.multicoreTaskTypes = ['MultiProcessing', 'MultiProduction']
        config.BossAir.nCondorProcesses = 1
        config.BossAir.section_("MockPlugin")
        config.BossAir.MockPlugin.fakeReport = os.path.join(getTestBase(),
                                                         'WMComponent_t/JobSubmitter_t',
                                                         "submit.sh")
        # JobSubmitter configuration
        config.component_("JobSubmitter")
        config.JobSubmitter.logLevel      = 'DEBUG'
        config.JobSubmitter.maxThreads    = 1
        config.JobSubmitter.pollInterval  = 10
        config.JobSubmitter.submitScript  = os.path.join(getTestBase(),
                                                         'WMComponent_t/JobSubmitter_t',
                                                         'submit.sh')
        config.JobSubmitter.componentDir  = os.path.join(self.testDir, 'Components')
        config.JobSubmitter.workerThreads = 2
        config.JobSubmitter.jobsPerWorker = 200

        #JobStateMachine
        config.component_('JobStateMachine')
        config.JobStateMachine.couchurl        = os.getenv('COUCHURL')
        config.JobStateMachine.couchDBName     = "jobsubmitter_t"
        config.JobStateMachine.jobSummaryDBName = 'wmagent_summary_t'

        # Needed, because this is a test
        os.makedirs(config.JobSubmitter.componentDir)

        return config

    def createTestWorkload(self, workloadName = 'Tier1ReReco'):
        """
        _createTestWorkload_

        Creates a test workload for us to run on, hold the basic necessities.
        """

        workload = testWorkload(workloadName)

        taskMaker = TaskMaker(workload, os.path.join(self.testDir, 'workloadTest'))
        taskMaker.skipSubscription = True
        taskMaker.processWorkload()

        return workload

    def testA_BasicTest(self):
        """
        Use the MockPlugin to create a simple test
        Check to see that all the jobs were "submitted",
        don't care about thresholds
        """
        workloadName = "basicWorkload"
        workload = self.createTestWorkload()
        config = self.getConfig()
        changeState = ChangeState(config)

        nSubs = 2
        nJobs = 20
        site = 'T2_US_UCSD'

        self.setResourceThresholds(site, pendingSlots = 50, runningSlots = 100, tasks = ['Processing', 'Merge'],
                                   Processing = {'pendingSlots' : 50, 'runningSlots' : 100},
                                   Merge = {'pendingSlots' : 50, 'runningSlots' : 100})

        jobGroupList = self.createJobGroups(nSubs = nSubs, nJobs = nJobs,
                                            task = workload.getTask("ReReco"),
                                            workloadSpec = os.path.join(self.testDir, 'workloadTest',
                                                                        workloadName),
                                            site = 'se.%s' % site)
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        # Do pre-submit check
        getJobsAction = self.daoFactory(classname = "Jobs.GetAllJobs")
        result = getJobsAction.execute(state = 'Created', jobType = "Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        jobSubmitter = JobSubmitterPoller(config = config)
        jobSubmitter.algorithm()

        # Check that jobs are in the right state
        result = getJobsAction.execute(state = 'Created', jobType = "Processing")
        self.assertEqual(len(result), 0)
        result = getJobsAction.execute(state = 'Executing', jobType = "Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        # Check assigned locations
        getLocationAction = self.daoFactory(classname = "Jobs.GetLocation")
        for jobId in result:
            loc = getLocationAction.execute(jobid = jobId)
            self.assertEqual(loc, [['T2_US_UCSD']])

        # Run another cycle, it shouldn't submit anything. There isn't anything to submit
        jobSubmitter.algorithm()
        result = getJobsAction.execute(state = 'Created', jobType = "Processing")
        self.assertEqual(len(result), 0)
        result = getJobsAction.execute(state = 'Executing', jobType = "Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        nSubs = 1
        nJobs = 10

        # Submit another 10 jobs
        jobGroupList = self.createJobGroups(nSubs = nSubs, nJobs = nJobs,
                                            task = workload.getTask("ReReco"),
                                            workloadSpec = os.path.join(self.testDir, 'workloadTest',
                                                                        workloadName),
                                            site = 'se.%s' % site,
                                            taskType = "Merge")
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        # Check that the jobs are available for submission and run another cycle
        result = getJobsAction.execute(state = 'Created', jobType = "Merge")
        self.assertEqual(len(result), nSubs * nJobs)
        jobSubmitter.algorithm()

        #Check that the last 10 jobs were submitted as well.
        result = getJobsAction.execute(state = 'Created', jobType = "Merge")
        self.assertEqual(len(result), 0)
        result = getJobsAction.execute(state = 'Executing', jobType = "Merge")
        self.assertEqual(len(result), nSubs * nJobs)

        return

    def testB_thresholdTest(self):
        """
        _testB_thresholdTest_

        Check that the threshold management is working,
        this requires checks on pending/running jobs globally
        at a site and per task/site
        """
        workloadName = "basicWorkload"
        workload = self.createTestWorkload()
        config = self.getConfig()
        changeState = ChangeState(config)

        nSubs = 5
        nJobs = 10
        sites = ['T1_US_FNAL']

        for site in sites:
            self.setResourceThresholds(site, pendingSlots = 50, runningSlots = 200, tasks = ['Processing', 'Merge'],
                                       Processing = {'pendingSlots' : 45, 'runningSlots' :-1},
                                       Merge = {'pendingSlots' : 10, 'runningSlots' : 20, 'priority' : 5})

        # Always initialize the submitter after setting the sites, flaky!
        jobSubmitter = JobSubmitterPoller(config = config)

        jobGroupList = self.createJobGroups(nSubs = nSubs, nJobs = nJobs,
                                            task = workload.getTask("ReReco"),
                                            workloadSpec = os.path.join(self.testDir, 'workloadTest',
                                                                        workloadName),
                                            site = 'se.%s' % 'T1_US_FNAL')
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        # Do pre-submit check
        getJobsAction = self.daoFactory(classname = "Jobs.GetAllJobs")
        result = getJobsAction.execute(state = 'Created', jobType = "Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        jobSubmitter.algorithm()

        # Check that jobs are in the right state, 
        # here we are limited by the pending threshold for the Processing task (45)
        result = getJobsAction.execute(state = 'Created', jobType = "Processing")
        self.assertEqual(len(result), 5)
        result = getJobsAction.execute(state = 'Executing', jobType = "Processing")
        self.assertEqual(len(result), 45)

        # Check assigned locations
        getLocationAction = self.daoFactory(classname = "Jobs.GetLocation")
        for jobId in result:
            loc = getLocationAction.execute(jobid = jobId)
            self.assertEqual(loc, [['T1_US_FNAL']])

        # Run another cycle, it shouldn't submit anything. Jobs are still in pending
        jobSubmitter.algorithm()
        result = getJobsAction.execute(state = 'Created', jobType = "Processing")
        self.assertEqual(len(result), 5)
        result = getJobsAction.execute(state = 'Executing', jobType = "Processing")
        self.assertEqual(len(result), 45)

        # Now put 10 Merge jobs, only 5 can be submitted, there we hit the global pending threshold for the site
        nSubs = 1
        nJobs = 10
        jobGroupList = self.createJobGroups(nSubs = nSubs, nJobs = nJobs,
                                            task = workload.getTask("ReReco"),
                                            workloadSpec = os.path.join(self.testDir, 'workloadTest',
                                                                        workloadName),
                                            site = 'se.%s' % 'T1_US_FNAL',
                                            taskType = 'Merge')
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        jobSubmitter.algorithm()
        result = getJobsAction.execute(state = 'Created', jobType = "Merge")
        self.assertEqual(len(result), 5)
        result = getJobsAction.execute(state = 'Executing', jobType = "Merge")
        self.assertEqual(len(result), 5)
        result = getJobsAction.execute(state = 'Created', jobType = "Processing")
        self.assertEqual(len(result), 5)
        result = getJobsAction.execute(state = 'Executing', jobType = "Processing")
        self.assertEqual(len(result), 45)

        # Now let's test running thresholds
        # The scenario will be setup as follows: Move all current jobs as running
        # Create 300 Processing jobs and 300 merge jobs
        # Run 5 polling cycles, moving all pending jobs to running in between
        # Result is, merge is left at 25 running 0 pending and processing is left at 215 running 0 pending
        # Processing has 135 jobs in queue and Merge 285
        # This tests all threshold dynamics including the prioritization of merge over processing
        nSubs = 1
        nJobs = 300
        jobGroupList = self.createJobGroups(nSubs = nSubs, nJobs = nJobs,
                                            task = workload.getTask("ReReco"),
                                            workloadSpec = os.path.join(self.testDir, 'workloadTest',
                                                                        workloadName),
                                            site = 'se.%s' % 'T1_US_FNAL')
        jobGroupList.extend(self.createJobGroups(nSubs = nSubs, nJobs = nJobs,
                                            task = workload.getTask("ReReco"),
                                            workloadSpec = os.path.join(self.testDir, 'workloadTest',
                                                                        workloadName),
                                            site = 'se.%s' % 'T1_US_FNAL',
                                            taskType = 'Merge'))
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        getRunJobID = self.baDaoFactory(classname = "LoadByWMBSID")
        setRunJobStatus = self.baDaoFactory(classname = "SetStatus")

        for _ in range(5):
            result = getJobsAction.execute(state = 'Executing')
            binds = []
            for jobId in result:
                binds.append({'id' : jobId, 'retry_count' : 0})
            runJobIds = getRunJobID.execute(binds)
            setRunJobStatus.execute([x['id'] for x in runJobIds], 'Running')
            jobSubmitter.algorithm()

        result = getJobsAction.execute(state = 'Executing', jobType = 'Processing')
        self.assertEqual(len(result), 215)
        result = getJobsAction.execute(state = 'Created', jobType = 'Processing')
        self.assertEqual(len(result), 135)
        result = getJobsAction.execute(state = 'Executing', jobType = 'Merge')
        self.assertEqual(len(result), 25)
        result = getJobsAction.execute(state = 'Created', jobType = 'Merge')
        self.assertEqual(len(result), 285)

        return

    def testC_prioritization(self):
        """
        _testC_prioritization_

        Check that jobs are prioritized by job type and by oldest workflow
        """
        workloadName = "basicWorkload"
        workload = self.createTestWorkload()
        config = self.getConfig()
        changeState = ChangeState(config)

        nSubs = 1
        nJobs = 10
        sites = ['T1_US_FNAL']

        for site in sites:
            self.setResourceThresholds(site, pendingSlots = 10, runningSlots = -1, tasks = ['Processing', 'Merge'],
                                       Processing = {'pendingSlots' : 50, 'runningSlots' :-1},
                                       Merge = {'pendingSlots' : 10, 'runningSlots' :-1, 'priority' : 5})

        # Always initialize the submitter after setting the sites, flaky!
        jobSubmitter = JobSubmitterPoller(config = config)

        jobGroupList = self.createJobGroups(nSubs = nSubs, nJobs = nJobs,
                                            task = workload.getTask("ReReco"),
                                            workloadSpec = os.path.join(self.testDir, 'workloadTest',
                                                                        workloadName),
                                            site = 'se.%s' % 'T1_US_FNAL',
                                            name = 'OldestWorkflow')
        jobGroupList.extend(self.createJobGroups(nSubs = nSubs, nJobs = nJobs,
                                            task = workload.getTask("ReReco"),
                                            workloadSpec = os.path.join(self.testDir, 'workloadTest',
                                                                        workloadName),
                                            site = 'se.%s' % 'T1_US_FNAL',
                                            taskType = 'Merge'))
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        jobSubmitter.algorithm()

        # Merge goes first
        getJobsAction = self.daoFactory(classname = "Jobs.GetAllJobs")
        result = getJobsAction.execute(state = 'Created', jobType = "Merge")
        self.assertEqual(len(result), 0)
        result = getJobsAction.execute(state = 'Executing', jobType = "Merge")
        self.assertEqual(len(result), 10)
        result = getJobsAction.execute(state = 'Created', jobType = "Processing")
        self.assertEqual(len(result), 10)
        result = getJobsAction.execute(state = 'Executing', jobType = "Processing")
        self.assertEqual(len(result), 0)

        # Create a newer workflow processing, and after some new jobs for an old workflow

        jobGroupList = self.createJobGroups(nSubs = nSubs, nJobs = nJobs,
                                            task = workload.getTask("ReReco"),
                                            workloadSpec = os.path.join(self.testDir, 'workloadTest',
                                                                        workloadName),
                                            site = 'se.%s' % 'T1_US_FNAL',
                                            name = 'NewestWorkflow')

        jobGroupList.extend(self.createJobGroups(nSubs = nSubs, nJobs = nJobs,
                                    task = workload.getTask("ReReco"),
                                    workloadSpec = os.path.join(self.testDir, 'workloadTest',
                                                                workloadName),
                                    site = 'se.%s' % 'T1_US_FNAL',
                                    name = 'OldestWorkflow'))

        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        # Move pending jobs to running

        getRunJobID = self.baDaoFactory(classname = "LoadByWMBSID")
        setRunJobStatus = self.baDaoFactory(classname = "SetStatus")

        for idx in range(2):
            result = getJobsAction.execute(state = 'Executing')
            binds = []
            for jobId in result:
                binds.append({'id' : jobId, 'retry_count' : 0})
            runJobIds = getRunJobID.execute(binds)
            setRunJobStatus.execute([x['id'] for x in runJobIds], 'Running')

            # Run again on created workflows
            jobSubmitter.algorithm()

            result = getJobsAction.execute(state = 'Created', jobType = "Merge")
            self.assertEqual(len(result), 0)
            result = getJobsAction.execute(state = 'Executing', jobType = "Merge")
            self.assertEqual(len(result), 10)
            result = getJobsAction.execute(state = 'Created', jobType = "Processing")
            self.assertEqual(len(result), 30 - (idx + 1) * 10)
            result = getJobsAction.execute(state = 'Executing', jobType = "Processing")
            self.assertEqual(len(result), (idx + 1) * 10)

            # Check that older workflow goes first even with newer jobs
            getWorkflowAction = self.daoFactory(classname = "Jobs.GetWorkflowTask")
            workflows = getWorkflowAction.execute(result)
            for workflow in workflows:
                self.assertEqual(workflow['name'], 'OldestWorkflow')

        return

    def testD_WhiteListBlackList(self):
        """
        _testD_WhiteListBlackList_

        Test the whitelist/blacklist implementation
        Trust the jobCreator to get this in the job right
        """
        workloadName = "basicWorkload"
        workload = self.createTestWorkload()
        config = self.getConfig()
        changeState = ChangeState(config)

        nSubs = 2
        nJobs = 10

        sites = ['T2_US_Florida', 'T2_TW_Taiwan', 'T2_CH_CERN', 'T3_CO_Uniandes']

        for site in sites:
            self.setResourceThresholds(site, pendingSlots = 1000, runningSlots = -1, tasks = ['Processing', 'Merge'],
                                       Processing = {'pendingSlots' : 5000, 'runningSlots' :-1},
                                       Merge = {'pendingSlots' : 1000, 'runningSlots' :-1, 'priority' : 5})

        jobGroupList = self.createJobGroups(nSubs = nSubs, nJobs = nJobs,
                                            site = 'se.%s' % sites[-1],
                                            task = workload.getTask("ReReco"),
                                            workloadSpec = os.path.join(self.testDir,
                                                                        'workloadTest',
                                                                        workloadName),
                                            bl = sites[:-1])

        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        jobSubmitter = JobSubmitterPoller(config = config)

        # Actually run it
        jobSubmitter.algorithm()

        getJobsAction = self.daoFactory(classname = "Jobs.GetAllJobs")
        result = getJobsAction.execute(state = 'Executing', jobType = "Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        # All jobs should be at T3_CO_Uniandes
        # Check assigned locations
        getLocationAction = self.daoFactory(classname = "Jobs.GetLocation")
        locationDict = getLocationAction.execute([{'jobid' : x} for x in result])
        for entry in locationDict:
            loc = entry['site_name']
            self.assertEqual(loc, 'T3_CO_Uniandes')

        # Run again and test the whiteList
        jobGroupList = self.createJobGroups(nSubs = nSubs, nJobs = nJobs,
                                            task = workload.getTask("ReReco"),
                                            site = 'se.%s' % 'T2_CH_CERN',
                                            workloadSpec = os.path.join(self.testDir,
                                                                        'workloadTest',
                                                                        workloadName),
                                            wl = ['T2_CH_CERN'])

        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        # Run it
        jobSubmitter.algorithm()

        # You'll have jobs from the previous run still in the database
        result = getJobsAction.execute(state = 'Executing', jobType = "Processing")
        self.assertEqual(len(result), nSubs * nJobs * 2)

        # All jobs should be at CERN or Uniandes
        locationDict = getLocationAction.execute([{'jobid' : x} for x in result])
        for entry in locationDict[nSubs * nJobs:]:
            loc = entry['site_name']
            self.assertEqual(loc, 'T2_CH_CERN')

        # Run again with an invalid whitelist
        # After this point, the original two sets of jobs will be executing
        # The rest of the jobs should move to submitFailed
        jobGroupList = self.createJobGroups(nSubs = nSubs, nJobs = nJobs,
                                            task = workload.getTask("ReReco"),
                                            site = 'se.%s' % 'T2_CH_CERN',
                                            workloadSpec = os.path.join(self.testDir,
                                                                        'workloadTest',
                                                                        workloadName),
                                            wl = ['T2_US_Namibia'])

        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        jobSubmitter.algorithm()

        # Jobs should be gone
        getJobsAction = self.daoFactory(classname = "Jobs.GetAllJobs")
        result = getJobsAction.execute(state = 'Executing', jobType = "Processing")
        self.assertEqual(len(result), nSubs * nJobs * 2)
        result = getJobsAction.execute(state = 'SubmitFailed', jobType = "Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        # Run again with all sites blacklisted
        jobGroupList = self.createJobGroups(nSubs = nSubs, nJobs = nJobs,
                                            task = workload.getTask("ReReco"),
                                            site = ['se.%s' % x for x in sites],
                                            workloadSpec = os.path.join(self.testDir,
                                                                        'workloadTest',
                                                                        workloadName),
                                            bl = sites)

        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        jobSubmitter.algorithm()

        # Jobs should go to submit failed
        getJobsAction = self.daoFactory(classname = "Jobs.GetAllJobs")
        result = getJobsAction.execute(state = 'Executing', jobType = "Processing")
        self.assertEqual(len(result), nSubs * nJobs * 2)
        result = getJobsAction.execute(state = 'SubmitFailed', jobType = "Processing")
        self.assertEqual(len(result), nSubs * nJobs * 2)

        return

    def testE_SiteModesTest(self):
        """
        _testE_SiteModesTest_

        Test the behavior of the submitter in response to the different
        states of the sites
        """
        workloadName = "basicWorkload"
        workload = self.createTestWorkload()
        config = self.getConfig()
        changeState = ChangeState(config)
        nSubs = 1
        nJobs = 20

        sites = ['T2_US_Florida', 'T2_TW_Taiwan', 'T3_CO_Uniandes', 'T1_US_FNAL']
        for site in sites:
            self.setResourceThresholds(site, pendingSlots = 10, runningSlots = -1, tasks = ['Processing', 'Merge'],
                                       Processing = {'pendingSlots' : 10, 'runningSlots' :-1},
                                       Merge = {'pendingSlots' : 10, 'runningSlots' :-1, 'priority' : 5})

        myResourceControl = ResourceControl()
        myResourceControl.changeSiteState('T2_US_Florida', 'Draining')
        # First test that we prefer Normal over drain, and T1 over T2/T3
        jobGroupList = self.createJobGroups(nSubs = nSubs, nJobs = nJobs,
                                            site = ['se.%s' % x for x in sites],
                                            task = workload.getTask("ReReco"),
                                            workloadSpec = os.path.join(self.testDir,
                                                                        'workloadTest',
                                                                        workloadName))
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')
        jobSubmitter = JobSubmitterPoller(config = config)
        # Actually run it
        jobSubmitter.algorithm()

        getJobsAction = self.daoFactory(classname = "Jobs.GetAllJobs")
        result = getJobsAction.execute(state = 'Executing', jobType = "Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        # All jobs should be at either FNAL, Taiwan or Uniandes. It's a random selection
        # Check assigned locations
        getLocationAction = self.daoFactory(classname = "Jobs.GetLocation")
        locationDict = getLocationAction.execute([{'jobid' : x} for x in result])
        for entry in locationDict:
            loc = entry['site_name']
            self.assertNotEqual(loc, 'T2_US_Florida')

        # Now set everything to down, check we don't submit anything
        for site in sites:
            myResourceControl.changeSiteState(site, 'Down')
        jobGroupList = self.createJobGroups(nSubs = nSubs, nJobs = nJobs,
                                            site = ['se.%s' % x for x in sites],
                                            task = workload.getTask("ReReco"),
                                            workloadSpec = os.path.join(self.testDir,
                                                                        'workloadTest',
                                                                        workloadName))
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')
        jobSubmitter.algorithm()
        # Nothing is submitted despite the empty slots at Uniandes and Florida
        result = getJobsAction.execute(state = 'Executing', jobType = "Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        # Now set everything to Aborted, and create Merge jobs. Those should fail
        # since the can only run at one place
        for site in sites:
            myResourceControl.changeSiteState(site, 'Aborted')

        nSubsMerge = 1
        nJobsMerge = 5
        jobGroupList = self.createJobGroups(nSubs = nSubsMerge, nJobs = nJobsMerge,
                                            site = ['se.%s' % x for x in sites],
                                            task = workload.getTask("ReReco"),
                                            workloadSpec = os.path.join(self.testDir,
                                                                        'workloadTest',
                                                                        workloadName),
                                            taskType = 'Merge')

        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        jobSubmitter.algorithm()

        result = getJobsAction.execute(state = 'SubmitFailed', jobType = 'Merge')
        self.assertEqual(len(result), nSubsMerge * nJobsMerge)
        result = getJobsAction.execute(state = 'Executing', jobType = 'Processing')
        self.assertEqual(len(result), nSubs * nJobs)

        return

    @attr('performance')
    def testF_PollerProfileTest(self):
        """
        _testF_PollerProfileTest_

        Submit a lot of jobs and test how long it takes for
        them to actually be submitted
        """

        workloadName = "basicWorkload"
        workload = self.createTestWorkload()
        config = self.getConfig()
        changeState = ChangeState(config)

        nSubs = 100
        nJobs = 100
        sites = ['T1_US_FNAL']

        for site in sites:
            self.setResourceThresholds(site, pendingSlots = 20000, runningSlots = -1, tasks = ['Processing', 'Merge'],
                                       Processing = {'pendingSlots' : 10000, 'runningSlots' :-1},
                                       Merge = {'pendingSlots' : 10000, 'runningSlots' :-1, 'priority' : 5})

        # Always initialize the submitter after setting the sites, flaky!
        jobSubmitter = JobSubmitterPoller(config = config)

        jobGroupList = self.createJobGroups(nSubs = nSubs, nJobs = nJobs,
                                            task = workload.getTask("ReReco"),
                                            workloadSpec = os.path.join(self.testDir, 'workloadTest',
                                                                        workloadName),
                                            site = 'se.%s' % 'T1_US_FNAL')

        jobGroupList.extend(self.createJobGroups(nSubs = nSubs, nJobs = nJobs,
                                            task = workload.getTask("ReReco"),
                                            workloadSpec = os.path.join(self.testDir, 'workloadTest',
                                                                        workloadName),
                                            site = 'se.%s' % 'T1_US_FNAL',
                                            taskType = 'Merge'))

        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        # Actually run it
        startTime = time.time()
        cProfile.runctx("jobSubmitter.algorithm()", globals(), locals(), filename = "testStats.stat")
        stopTime = time.time()


        print "Job took %f seconds to complete" % (stopTime - startTime)

        p = pstats.Stats('testStats.stat')
        p.sort_stats('cumulative')
        p.print_stats()

        return
Beispiel #24
0
class BossAirTest(unittest.TestCase):
    """
    Tests for the BossAir prototype    

    """

    sites = ["T2_US_Florida", "T2_US_UCSD", "T2_TW_Taiwan", "T1_CH_CERN", "malpaquet"]

    def setUp(self):
        """
        setup for test.
        """

        myThread = threading.currentThread()

        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        # self.tearDown()
        self.testInit.setSchema(
            customModules=["WMCore.WMBS", "WMCore.BossAir", "WMCore.ResourceControl", "WMCore.Agent.Database"],
            useDefault=False,
        )
        self.testInit.setupCouch("bossair_t/jobs", "JobDump")
        self.testInit.setupCouch("bossair_t/fwjrs", "FWJRDump")

        self.daoFactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi)
        self.getJobs = self.daoFactory(classname="Jobs.GetAllJobs")

        locationAction = self.daoFactory(classname="Locations.New")
        locationSlots = self.daoFactory(classname="Locations.SetJobSlots")

        # Create sites in resourceControl
        resourceControl = ResourceControl()
        for site in self.sites:
            resourceControl.insertSite(
                siteName=site, seName="se.%s" % (site), ceName=site, plugin="CondorPlugin", jobSlots=1000
            )
            resourceControl.insertThreshold(siteName=site, taskType="Processing", maxSlots=1000)
        resourceControl.insertSite(siteName="Xanadu", seName="se.Xanadu", ceName="Xanadu", plugin="TestPlugin")
        resourceControl.insertThreshold(siteName="Xanadu", taskType="Processing", maxSlots=10000)

        resourceControl.insertSite(
            siteName="jade-cms.hip.fi", seName="madhatter.csc.fi", ceName="jade-cms.hip.fi", plugin="ARCPlugin"
        )
        resourceControl.insertThreshold(siteName="jade-cms.hip.fi", taskType="Processing", maxSlots=100)
        # using this for glite submissions
        resourceControl.insertSite(
            siteName="grid-ce-01.ba.infn.it",
            seName="storm-se-01.ba.infn.it",
            ceName="grid-ce-01.ba.infn.it",
            plugin="gLitePlugin",
        )
        resourceControl.insertThreshold(siteName="grid-ce-01.ba.infn.it", taskType="Processing", maxSlots=50)

        # Create user
        newuser = self.daoFactory(classname="Users.New")
        newuser.execute(dn="moron")

        # We actually need the user name
        self.user = getpass.getuser()

        self.testDir = self.testInit.generateWorkDir()

        # Set heartbeat
        componentName = "test"
        self.heartbeatAPI = HeartbeatAPI(componentName)
        self.heartbeatAPI.registerComponent()
        componentName = "JobTracker"
        self.heartbeatAPI2 = HeartbeatAPI(componentName)
        self.heartbeatAPI2.registerComponent()

        return

    def tearDown(self):
        """
        Database deletion
        """
        self.testInit.clearDatabase(
            modules=["WMCore.WMBS", "WMCore.BossAir", "WMCore.ResourceControl", "WMCore.Agent.Database"]
        )

        self.testInit.delWorkDir()

        self.testInit.tearDownCouch()

        return

    def getConfig(self):
        """
        _getConfig_

        Build a basic BossAir config
        """

        config = Configuration()

        config.section_("Agent")
        config.Agent.agentName = "testAgent"
        config.Agent.componentName = "test"
        config.Agent.useHeartbeat = False

        config.section_("CoreDatabase")
        config.CoreDatabase.connectUrl = os.getenv("DATABASE")
        config.CoreDatabase.socket = os.getenv("DBSOCK")

        config.section_("BossAir")
        config.BossAir.pluginNames = ["TestPlugin", "CondorPlugin"]
        config.BossAir.pluginDir = "WMCore.BossAir.Plugins"

        config.component_("JobSubmitter")
        config.JobSubmitter.logLevel = "INFO"
        config.JobSubmitter.pollInterval = 1
        config.JobSubmitter.pluginName = "AirPlugin"
        config.JobSubmitter.pluginDir = "JobSubmitter.Plugins"
        config.JobSubmitter.submitDir = os.path.join(self.testDir, "submit")
        config.JobSubmitter.submitNode = os.getenv("HOSTNAME", "badtest.fnal.gov")
        config.JobSubmitter.submitScript = os.path.join(
            WMCore.WMInit.getWMBASE(), "test/python/WMComponent_t/JobSubmitter_t", "submit.sh"
        )
        config.JobSubmitter.componentDir = os.path.join(os.getcwd(), "Components")
        config.JobSubmitter.workerThreads = 2
        config.JobSubmitter.jobsPerWorker = 200
        config.JobSubmitter.gLiteConf = os.path.join(os.getcwd(), "config.cfg")

        # JobTracker
        config.component_("JobTracker")
        config.JobTracker.logLevel = "INFO"
        config.JobTracker.pollInterval = 1

        # JobStateMachine
        config.component_("JobStateMachine")
        config.JobStateMachine.couchurl = os.getenv("COUCHURL")
        config.JobStateMachine.couchDBName = "bossair_t"

        # JobStatusLite
        config.component_("JobStatusLite")
        config.JobStatusLite.componentDir = os.path.join(os.getcwd(), "Components")
        config.JobStatusLite.stateTimeouts = {"Pending": 10, "Running": 86400}
        config.JobStatusLite.pollInterval = 1

        return config

    def createTestWorkload(self, workloadName="Test", emulator=True):
        """
        _createTestWorkload_

        Creates a test workload for us to run on, hold the basic necessities.
        """

        workload = testWorkload("Tier1ReReco")
        rereco = workload.getTask("ReReco")

        taskMaker = TaskMaker(workload, os.path.join(self.testDir, "workloadTest"))
        taskMaker.skipSubscription = True
        taskMaker.processWorkload()

        workload.save(workloadName)

        return workload

    def createJobGroups(self, nSubs, nJobs, task, workloadSpec, site=None, bl=[], wl=[]):
        """
        Creates a series of jobGroups for submissions

        """

        jobGroupList = []

        testWorkflow = Workflow(spec=workloadSpec, owner="mnorman", name=makeUUID(), task="basicWorkload/Production")
        testWorkflow.create()

        # Create subscriptions
        for i in range(nSubs):

            name = makeUUID()

            # Create Fileset, Subscription, jobGroup
            testFileset = Fileset(name=name)
            testFileset.create()
            testSubscription = Subscription(
                fileset=testFileset, workflow=testWorkflow, type="Processing", split_algo="FileBased"
            )
            testSubscription.create()

            testJobGroup = JobGroup(subscription=testSubscription)
            testJobGroup.create()

            # Create jobs
            self.makeNJobs(
                name=name,
                task=task,
                nJobs=nJobs,
                jobGroup=testJobGroup,
                fileset=testFileset,
                sub=testSubscription.exists(),
                site=site,
                bl=bl,
                wl=wl,
            )

            testFileset.commit()
            testJobGroup.commit()
            jobGroupList.append(testJobGroup)

        return jobGroupList

    def makeNJobs(self, name, task, nJobs, jobGroup, fileset, sub, site=None, bl=[], wl=[]):
        """
        _makeNJobs_

        Make and return a WMBS Job and File
        This handles all those damn add-ons

        """
        # Set the CacheDir
        cacheDir = os.path.join(self.testDir, "CacheDir")

        for n in range(nJobs):
            # First make a file
            # site = self.sites[0]
            testFile = File(lfn="/singleLfn/%s/%s" % (name, n), size=1024, events=10)
            if site:
                testFile.setLocation(site)
            else:
                for tmpSite in self.sites:
                    testFile.setLocation("se.%s" % (tmpSite))
            testFile.create()
            fileset.addFile(testFile)

        fileset.commit()

        index = 0
        for f in fileset.files:
            index += 1
            testJob = Job(name="%s-%i" % (name, index))
            testJob.addFile(f)
            testJob["location"] = f.getLocations()[0]
            testJob["custom"]["location"] = f.getLocations()[0]
            testJob["task"] = task.getPathName()
            testJob["sandbox"] = task.data.input.sandbox
            testJob["spec"] = os.path.join(self.testDir, "basicWorkload.pcl")
            testJob["mask"]["FirstEvent"] = 101
            testJob["owner"] = "mnorman"
            testJob["siteBlacklist"] = bl
            testJob["siteWhitelist"] = wl
            testJob["ownerDN"] = "mnorman"
            jobCache = os.path.join(cacheDir, "Sub_%i" % (sub), "Job_%i" % (index))
            os.makedirs(jobCache)
            testJob.create(jobGroup)
            testJob["cache_dir"] = jobCache
            testJob.save()
            jobGroup.add(testJob)
            output = open(os.path.join(jobCache, "job.pkl"), "w")
            pickle.dump(testJob, output)
            output.close()

        return testJob, testFile

    def createDummyJobs(self, nJobs, location=None):
        """
        _createDummyJobs_
        
        Create some dummy jobs
        """

        if not location:
            location = self.sites[0]

        nameStr = makeUUID()

        testWorkflow = Workflow(spec=nameStr, owner="mnorman", name=nameStr, task="basicWorkload/Production")
        testWorkflow.create()

        testFileset = Fileset(name=nameStr)
        testFileset.create()

        testSubscription = Subscription(
            fileset=testFileset, workflow=testWorkflow, type="Processing", split_algo="FileBased"
        )
        testSubscription.create()

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        jobList = []

        for i in range(nJobs):
            testJob = Job(name="%s-%i" % (nameStr, i))
            testJob["location"] = location
            testJob["custom"]["location"] = location
            testJob["userdn"] = "moron"
            testJob["owner"] = "moron"
            testJob.create(testJobGroup)
            jobList.append(testJob)

        return jobList

    @attr("integration")
    def testA_APITest(self):
        """
        _APITest_

        This is a commissioning test that has very little to do
        with anything except loading the code.
        """
        # return

        myThread = threading.currentThread()

        config = self.getConfig()

        baAPI = BossAirAPI(config=config)

        # We should have loaded a plugin
        self.assertTrue("TestPlugin" in baAPI.plugins.keys())

        result = myThread.dbi.processData("SELECT name FROM bl_status")[0].fetchall()
        statusList = []
        for i in result:
            statusList.append(i.values()[0])

        # We should have the plugin states in the database
        self.assertEqual(statusList.sort(), ["New", "Dead", "Gone"].sort())

        # Create some jobs
        nJobs = 10

        jobDummies = self.createDummyJobs(nJobs=nJobs)

        baAPI.createNewJobs(wmbsJobs=jobDummies)

        runningJobs = baAPI._listRunJobs()

        self.assertEqual(len(runningJobs), nJobs)

        newJobs = baAPI._loadByStatus(status="New")
        self.assertEqual(len(newJobs), nJobs)
        deadJobs = baAPI._loadByStatus(status="Dead")
        self.assertEqual(len(deadJobs), 0)
        raisesException = False

        try:
            baAPI._loadByStatus(status="FalseStatus")
        except BossAirException:
            # It should raise an error if we try loading a
            # non-existant status
            raisesException = True
        self.assertTrue(raisesException)

        # Change the job status and update it
        for job in newJobs:
            job["status"] = "Dead"

        baAPI._updateJobs(jobs=newJobs)

        # Test whether we see the job status as updated
        newJobs = baAPI._loadByStatus(status="New")
        self.assertEqual(len(newJobs), 0)
        deadJobs = baAPI._loadByStatus(status="Dead")
        self.assertEqual(len(deadJobs), nJobs)

        # Can we load by BossAir ID?
        loadedJobs = baAPI._loadByID(jobs=deadJobs)
        self.assertEqual(len(loadedJobs), nJobs)

        # Can we load via WMBS?
        loadedJobs = baAPI.loadByWMBS(wmbsJobs=jobDummies)
        self.assertEqual(len(loadedJobs), nJobs)

        # See if we can delete jobs
        baAPI._deleteJobs(jobs=deadJobs)

        # Confirm that they're gone
        deadJobs = baAPI._loadByStatus(status="Dead")
        self.assertEqual(len(deadJobs), 0)

        self.assertEqual(len(baAPI.jobs), 0)

        return

    @attr("integration")
    def testB_PluginTest(self):
        """
        _PluginTest_
        

        Now check that these functions worked if called through plugins
        Instead of directly.

        There are only three plugin
        """
        # return

        myThread = threading.currentThread()

        config = self.getConfig()

        baAPI = BossAirAPI(config=config)

        # Create some jobs
        nJobs = 10

        jobDummies = self.createDummyJobs(nJobs=nJobs, location="Xanadu")
        changeState = ChangeState(config)
        changeState.propagate(jobDummies, "created", "new")
        changeState.propagate(jobDummies, "executing", "created")

        # Prior to building the job, each job must have a plugin
        # and user assigned
        for job in jobDummies:
            job["plugin"] = "TestPlugin"
            job["owner"] = "mnorman"

        baAPI.submit(jobs=jobDummies)

        newJobs = baAPI._loadByStatus(status="New")
        self.assertEqual(len(newJobs), nJobs)

        # Should be no more running jobs
        runningJobs = baAPI._listRunJobs()
        self.assertEqual(len(runningJobs), nJobs)

        # Test Plugin should complete all jobs
        baAPI.track()

        # Should be no more running jobs
        runningJobs = baAPI._listRunJobs()
        self.assertEqual(len(runningJobs), 0)

        # Check if they're complete
        completeJobs = baAPI.getComplete()
        self.assertEqual(len(completeJobs), nJobs)

        # Do this test because BossAir is specifically built
        # to keep it from finding completed jobs
        result = myThread.dbi.processData("SELECT id FROM bl_runjob")[0].fetchall()
        self.assertEqual(len(result), nJobs)

        baAPI.removeComplete(jobs=jobDummies)

        result = myThread.dbi.processData("SELECT id FROM bl_runjob")[0].fetchall()
        self.assertEqual(len(result), 0)

        return

    def testG_monitoringDAO(self):
        """
        _monitoringDAO_

        Because I need a test for the monitoring DAO
        """

        return

        myThread = threading.currentThread()

        config = self.getConfig()

        changeState = ChangeState(config)

        baAPI = BossAirAPI(config=config)

        # Create some jobs
        nJobs = 10

        jobDummies = self.createDummyJobs(nJobs=nJobs)

        # Prior to building the job, each job must have a plugin
        # and user assigned
        for job in jobDummies:
            job["plugin"] = "TestPlugin"
            job["owner"] = "mnorman"
            job["location"] = "T2_US_UCSD"
            job.save()

        baAPI.submit(jobs=jobDummies)

        results = baAPI.monitor()

        self.assertEqual(len(results), nJobs)
        for job in results:
            self.assertEqual(job["plugin"], "CondorPlugin")

        return
Beispiel #25
0
class BossAirTest(unittest.TestCase):
    """
    Tests for the BossAir prototype

    """

    sites = ['T2_US_UCSD', 'T2_TW_Taiwan', 'T1_CH_CERN', 'T2_US_Florida']

    def setUp(self):
        """
        setup for test.
        """

        myThread = threading.currentThread()

        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.tearDown()
        self.testInit.setSchema(customModules=[
            "WMCore.WMBS", "WMCore.BossAir", "WMCore.ResourceControl",
            "WMCore.Agent.Database"
        ],
                                useDefault=False)
        self.testInit.setupCouch("bossair_t/jobs", "JobDump")
        self.testInit.setupCouch("bossair_t/fwjrs", "FWJRDump")

        self.daoFactory = DAOFactory(package="WMCore.WMBS",
                                     logger=myThread.logger,
                                     dbinterface=myThread.dbi)
        self.getJobs = self.daoFactory(classname="Jobs.GetAllJobs")

        #Create sites in resourceControl
        resourceControl = ResourceControl()
        for site in self.sites:
            resourceControl.insertSite(siteName=site,
                                       pnn='se.%s' % (site),
                                       cmsName=site,
                                       ceName=site,
                                       plugin="CondorPlugin",
                                       pendingSlots=1000,
                                       runningSlots=2000)
            resourceControl.insertThreshold(siteName = site, taskType = 'Processing', \
                                            maxSlots = 1000, pendingSlots = 1000)
        resourceControl.insertSite(siteName='Xanadu',
                                   pnn='se.Xanadu',
                                   cmsName=site,
                                   ceName='Xanadu',
                                   plugin="TestPlugin")
        resourceControl.insertThreshold(siteName = 'Xanadu', taskType = 'Processing', \
                                        maxSlots = 10000, pendingSlots = 10000)

        resourceControl.insertSite(siteName='jade-cms.hip.fi',
                                   pnn='madhatter.csc.fi',
                                   cmsName=site,
                                   ceName='jade-cms.hip.fi',
                                   plugin="ARCPlugin")
        resourceControl.insertThreshold(siteName = 'jade-cms.hip.fi', taskType = 'Processing', \
                                        maxSlots = 100, pendingSlots = 100)
        # using this for glite submissions
        resourceControl.insertSite(siteName='grid-ce-01.ba.infn.it',
                                   pnn='storm-se-01.ba.infn.it',
                                   cmsName=site,
                                   ceName='grid-ce-01.ba.infn.it',
                                   plugin='gLitePlugin')
        resourceControl.insertThreshold(siteName = 'grid-ce-01.ba.infn.it', taskType = 'Processing', \
                                        maxSlots = 50, pendingSlots = 50)

        # Create user
        newuser = self.daoFactory(classname="Users.New")
        newuser.execute(dn="tapas", group_name="phgroup", role_name="cmsrole")

        # We actually need the user name
        self.user = getpass.getuser()

        # Change this to the working dir to keep track of error and log files from condor
        self.testDir = self.testInit.generateWorkDir()

        # Set heartbeat
        componentName = 'test'
        self.heartbeatAPI = HeartbeatAPI(componentName)
        self.heartbeatAPI.registerComponent()
        componentName = 'JobTracker'
        self.heartbeatAPI2 = HeartbeatAPI(componentName)
        self.heartbeatAPI2.registerComponent()

        return

    def tearDown(self):
        """
        Database deletion
        """
        #self.testInit.clearDatabase(modules = ["WMCore.WMBS", "WMCore.BossAir", "WMCore.ResourceControl", "WMCore.Agent.Database"])

        self.testInit.delWorkDir()

        self.testInit.tearDownCouch()

        return

    def getConfig(self):
        """
        _getConfig_

        Build a basic BossAir config
        """

        config = self.testInit.getConfiguration()

        config.section_("Agent")
        config.Agent.agentName = 'testAgent'
        config.Agent.componentName = 'test'
        config.Agent.useHeartbeat = False

        config.section_("CoreDatabase")
        config.CoreDatabase.connectUrl = os.getenv("DATABASE")
        config.CoreDatabase.socket = os.getenv("DBSOCK")

        config.section_("BossAir")
        config.BossAir.pluginNames = ['TestPlugin', 'CondorPlugin']
        config.BossAir.pluginDir = 'WMCore.BossAir.Plugins'
        config.BossAir.UISetupScript = '/afs/cern.ch/cms/LCG/LCG-2/UI/cms_ui_env.sh'

        config.component_("JobSubmitter")
        config.JobSubmitter.logLevel = 'INFO'
        config.JobSubmitter.pollInterval = 1
        config.JobSubmitter.pluginName = 'AirPlugin'
        config.JobSubmitter.pluginDir = 'JobSubmitter.Plugins'
        config.JobSubmitter.submitDir = os.path.join(self.testDir, 'submit')
        config.JobSubmitter.submitNode = os.getenv("HOSTNAME",
                                                   'stevia.hep.wisc.edu')
        config.JobSubmitter.submitScript = os.path.join(
            WMCore.WMInit.getWMBASE(),
            'test/python/WMComponent_t/JobSubmitter_t', 'submit.sh')
        config.JobSubmitter.componentDir = os.path.join(
            os.getcwd(), 'Components')
        config.JobSubmitter.workerThreads = 2
        config.JobSubmitter.jobsPerWorker = 200
        config.JobSubmitter.gLiteConf = os.path.join(os.getcwd(), 'config.cfg')

        # JobTracker
        config.component_("JobTracker")
        config.JobTracker.logLevel = 'INFO'
        config.JobTracker.pollInterval = 1

        # JobStateMachine
        config.component_('JobStateMachine')
        config.JobStateMachine.couchurl = os.getenv('COUCHURL')
        config.JobStateMachine.couchDBName = "bossair_t"

        # JobStatusLite
        config.component_('JobStatusLite')
        config.JobStatusLite.componentDir = os.path.join(
            os.getcwd(), 'Components')
        config.JobStatusLite.stateTimeouts = {'Pending': 10, 'Running': 86400}
        config.JobStatusLite.pollInterval = 1

        return config

    def createTestWorkload(self, workloadName='Test', emulator=True):
        """
        _createTestWorkload_

        Creates a test workload for us to run on, hold the basic necessities.
        """

        workload = testWorkload("Tier1ReReco")
        rereco = workload.getTask("ReReco")

        taskMaker = TaskMaker(workload,
                              os.path.join(self.testDir, 'workloadTest'))
        taskMaker.skipSubscription = True
        taskMaker.processWorkload()

        workload.save(workloadName)

        return workload

    def createJobGroups(self,
                        nSubs,
                        nJobs,
                        task,
                        workloadSpec,
                        site=None,
                        bl=[],
                        wl=[]):
        """
        Creates a series of jobGroups for submissions

        """

        jobGroupList = []

        testWorkflow = Workflow(spec=workloadSpec,
                                owner="tapas",
                                name=makeUUID(),
                                task="basicWorkload/Production",
                                owner_vogroup='phgroup',
                                owner_vorole='cmsrole')
        testWorkflow.create()

        # Create subscriptions
        for i in range(nSubs):

            name = makeUUID()

            # Create Fileset, Subscription, jobGroup
            testFileset = Fileset(name=name)
            testFileset.create()
            testSubscription = Subscription(fileset=testFileset,
                                            workflow=testWorkflow,
                                            type="Processing",
                                            split_algo="FileBased")
            testSubscription.create()

            testJobGroup = JobGroup(subscription=testSubscription)
            testJobGroup.create()

            # Create jobs
            self.makeNJobs(name=name,
                           task=task,
                           nJobs=nJobs,
                           jobGroup=testJobGroup,
                           fileset=testFileset,
                           sub=testSubscription.exists(),
                           site=site,
                           bl=bl,
                           wl=wl)

            testFileset.commit()
            testJobGroup.commit()
            jobGroupList.append(testJobGroup)

        return jobGroupList

    def makeNJobs(self,
                  name,
                  task,
                  nJobs,
                  jobGroup,
                  fileset,
                  sub,
                  site=None,
                  bl=[],
                  wl=[]):
        """
        _makeNJobs_

        Make and return a WMBS Job and File
        This handles all those damn add-ons

        """
        # Set the CacheDir
        cacheDir = os.path.join(self.testDir, 'CacheDir')

        for n in range(nJobs):
            # First make a file
            #site = self.sites[0]
            testFile = File(lfn="/singleLfn/%s/%s" % (name, n),
                            size=1024,
                            events=10)
            if site:
                testFile.setLocation(site)
            else:
                for tmpSite in self.sites:
                    testFile.setLocation('se.%s' % (tmpSite))
            testFile.create()
            fileset.addFile(testFile)

        fileset.commit()

        index = 0
        for f in fileset.files:
            index += 1
            testJob = Job(name='%s-%i' % (name, index))
            testJob.addFile(f)
            testJob["location"] = f.getLocations()[0]
            testJob['custom']['location'] = f.getLocations()[0]
            testJob['task'] = task.getPathName()
            testJob['sandbox'] = task.data.input.sandbox
            testJob['spec'] = os.path.join(self.testDir, 'basicWorkload.pcl')
            testJob['mask']['FirstEvent'] = 101
            testJob['owner'] = 'tapas'
            testJob["siteBlacklist"] = bl
            testJob["siteWhitelist"] = wl
            testJob['ownerDN'] = 'tapas'
            testJob['ownerRole'] = 'cmsrole'
            testJob['ownerGroup'] = 'phgroup'

            jobCache = os.path.join(cacheDir, 'Sub_%i' % (sub),
                                    'Job_%i' % (index))
            os.makedirs(jobCache)
            testJob.create(jobGroup)
            testJob['cache_dir'] = jobCache
            testJob.save()
            jobGroup.add(testJob)
            output = open(os.path.join(jobCache, 'job.pkl'), 'w')
            pickle.dump(testJob, output)
            output.close()

        return testJob, testFile

    def createDummyJobs(self, nJobs, location=None):
        """
        _createDummyJobs_

        Create some dummy jobs
        """

        if not location:
            location = self.sites[0]

        nameStr = makeUUID()

        testWorkflow = Workflow(spec=nameStr,
                                owner="tapas",
                                name=nameStr,
                                task="basicWorkload/Production",
                                owner_vogroup='phgroup',
                                owner_vorole='cmsrole')
        testWorkflow.create()

        testFileset = Fileset(name=nameStr)
        testFileset.create()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=testWorkflow,
                                        type="Processing",
                                        split_algo="FileBased")
        testSubscription.create()

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        jobList = []

        for i in range(nJobs):
            testJob = Job(name='%s-%i' % (nameStr, i))
            testJob['location'] = location
            testJob['custom']['location'] = location
            testJob['userdn'] = 'tapas'
            testJob['owner'] = 'tapas'
            testJob['userrole'] = 'cmsrole'
            testJob['usergroup'] = 'phgroup'

            testJob.create(testJobGroup)
            jobList.append(testJob)

        return jobList

    @attr('integration')
    def testA_APITest(self):
        """
        _APITest_

        This is a commissioning test that has very little to do
        with anything except loading the code.
        """
        #return

        myThread = threading.currentThread()

        config = self.getConfig()

        baAPI = BossAirAPI(config=config)

        # We should have loaded a plugin
        self.assertTrue('TestPlugin' in baAPI.plugins.keys())

        result = myThread.dbi.processData(
            "SELECT name FROM bl_status")[0].fetchall()
        statusList = []
        for i in result:
            statusList.append(i.values()[0])

        # We should have the plugin states in the database
        self.assertEqual(statusList.sort(), ['New', 'Dead', 'Gone'].sort())

        # Create some jobs
        nJobs = 10

        jobDummies = self.createDummyJobs(nJobs=nJobs)
        print(jobDummies)

        baAPI.createNewJobs(wmbsJobs=jobDummies)

        runningJobs = baAPI._listRunJobs()

        self.assertEqual(len(runningJobs), nJobs)

        newJobs = baAPI._loadByStatus(status='New')
        self.assertEqual(len(newJobs), nJobs)
        deadJobs = baAPI._loadByStatus(status='Dead')
        self.assertEqual(len(deadJobs), 0)
        raisesException = False

        self.assertRaises(BossAirException,
                          baAPI._loadByStatus,
                          status='FalseStatus')

        # Change the job status and update it
        for job in newJobs:
            job['status'] = 'Dead'

        baAPI._updateJobs(jobs=newJobs)

        # Test whether we see the job status as updated
        newJobs = baAPI._loadByStatus(status='New')
        self.assertEqual(len(newJobs), 0)
        deadJobs = baAPI._loadByStatus(status='Dead')
        self.assertEqual(len(deadJobs), nJobs)

        # Can we load by BossAir ID?
        loadedJobs = baAPI._loadByID(jobs=deadJobs)
        self.assertEqual(len(loadedJobs), nJobs)

        # Can we load via WMBS?
        loadedJobs = baAPI.loadByWMBS(wmbsJobs=jobDummies)
        self.assertEqual(len(loadedJobs), nJobs)

        # See if we can delete jobs
        baAPI._deleteJobs(jobs=deadJobs)

        # Confirm that they're gone
        deadJobs = baAPI._loadByStatus(status='Dead')
        self.assertEqual(len(deadJobs), 0)

        self.assertEqual(len(baAPI.jobs), 0)

        return

    @attr('integration')
    def testB_PluginTest(self):
        """
        _PluginTest_


        Now check that these functions worked if called through plugins
        Instead of directly.

        There are only three plugin
        """
        #return

        myThread = threading.currentThread()

        config = self.getConfig()

        baAPI = BossAirAPI(config=config)

        # Create some jobs
        nJobs = 10

        jobDummies = self.createDummyJobs(nJobs=nJobs, location='Xanadu')
        changeState = ChangeState(config)
        changeState.propagate(jobDummies, 'created', 'new')
        changeState.propagate(jobDummies, 'executing', 'created')

        # Prior to building the job, each job must have a plugin
        # and user assigned
        for job in jobDummies:
            job['plugin'] = 'TestPlugin'
            job['owner'] = 'tapas'

        baAPI.submit(jobs=jobDummies)

        newJobs = baAPI._loadByStatus(status='New')
        self.assertEqual(len(newJobs), nJobs)

        # Should be no more running jobs
        runningJobs = baAPI._listRunJobs()
        self.assertEqual(len(runningJobs), nJobs)

        # Test Plugin should complete all jobs
        baAPI.track()

        # Should be no more running jobs
        runningJobs = baAPI._listRunJobs()
        self.assertEqual(len(runningJobs), 0)

        # Check if they're complete
        completeJobs = baAPI.getComplete()
        self.assertEqual(len(completeJobs), nJobs)

        # Do this test because BossAir is specifically built
        # to keep it from finding completed jobs
        result = myThread.dbi.processData(
            "SELECT id FROM bl_runjob")[0].fetchall()
        self.assertEqual(len(result), nJobs)

        baAPI.removeComplete(jobs=jobDummies)

        result = myThread.dbi.processData(
            "SELECT id FROM bl_runjob")[0].fetchall()
        self.assertEqual(len(result), 0)

        return

    def testG_monitoringDAO(self):
        """
        _monitoringDAO_

        Because I need a test for the monitoring DAO
        """

        return

        myThread = threading.currentThread()

        config = self.getConfig()

        changeState = ChangeState(config)

        baAPI = BossAirAPI(config=config)

        # Create some jobs
        nJobs = 10

        jobDummies = self.createDummyJobs(nJobs=nJobs)

        # Prior to building the job, each job must have a plugin
        # and user assigned
        for job in jobDummies:
            job['plugin'] = 'TestPlugin'
            job['owner'] = 'tapas'
            job['location'] = 'T2_US_UCSD'
            job.save()

        baAPI.submit(jobs=jobDummies)

        results = baAPI.monitor()

        self.assertEqual(len(results), nJobs)
        for job in results:
            self.assertEqual(job['plugin'], 'CondorPlugin')

        return
class JobCreatorTest(unittest.TestCase):
    """
    Test case for the JobCreator

    """

    sites = ['T2_US_Florida', 'T2_US_UCSD', 'T2_TW_Taiwan', 'T1_CH_CERN']

    def setUp(self):
        """
        _setUp_

        Setup the database and logging connection.  Try to create all of the
        WMBS tables.  Also, create some dummy locations.
        """

        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()

        self.testInit.setSchema(customModules=['WMCore.WMBS', 'WMCore.ResourceControl', 'WMCore.Agent.Database'],
                                useDefault=False)
        self.couchdbname = "jobcreator_t"
        self.testInit.setupCouch("%s/jobs" % self.couchdbname, "JobDump")
        self.testInit.setupCouch("%s/fwjrs" % self.couchdbname, "FWJRDump")
        self.configFile = EmulatorSetup.setupWMAgentConfig()

        myThread = threading.currentThread()
        self.daoFactory = DAOFactory(package="WMCore.WMBS",
                                     logger=myThread.logger,
                                     dbinterface=myThread.dbi)

        locationAction = self.daoFactory(classname="Locations.New")
        for site in self.sites:
            locationAction.execute(siteName=site, pnn=site)

        # Create sites in resourceControl

        resourceControl = ResourceControl()
        for site in self.sites:
            resourceControl.insertSite(siteName=site, pnn=site, ceName=site)
            resourceControl.insertThreshold(siteName=site, taskType='Processing', maxSlots=10000, pendingSlots=10000)

        self.resourceControl = resourceControl

        self._setup = True
        self._teardown = False

        self.testDir = self.testInit.generateWorkDir()
        self.cwd = os.getcwd()

        # Set heartbeat
        self.componentName = 'JobCreator'
        self.heartbeatAPI = HeartbeatAPI(self.componentName)
        self.heartbeatAPI.registerComponent()

        return

    def tearDown(self):
        """
        _tearDown_

        Drop all the WMBS tables.
        """

        self.testInit.clearDatabase(modules=['WMCore.WMBS', 'WMCore.ResourceControl', 'WMCore.Agent.Database'])

        self.testInit.delWorkDir()

        self._teardown = True

        self.testInit.tearDownCouch()
        EmulatorSetup.deleteConfig(self.configFile)

        return

    def createJobCollection(self, name, nSubs, nFiles, workflowURL='test'):
        """
        _createJobCollection_

        Create a collection of jobs
        """

        myThread = threading.currentThread()

        testWorkflow = Workflow(spec=workflowURL, owner="mnorman",
                                name=name, task="/TestWorkload/ReReco")
        testWorkflow.create()

        for sub in range(nSubs):

            nameStr = '%s-%i' % (name, sub)

            myThread.transaction.begin()

            testFileset = Fileset(name=nameStr)
            testFileset.create()

            for f in range(nFiles):
                # pick a random site
                site = random.choice(self.sites)
                testFile = File(lfn="/lfn/%s/%i" % (nameStr, f), size=1024, events=10)
                testFile.setLocation(site)
                testFile.create()
                testFileset.addFile(testFile)

            testFileset.commit()
            testSubscription = Subscription(fileset=testFileset,
                                            workflow=testWorkflow,
                                            type="Processing",
                                            split_algo="FileBased")
            testSubscription.create()

            myThread.transaction.commit()

        return

    def createWorkload(self, workloadName='Test', emulator=True, priority=1):
        """
        _createTestWorkload_

        Creates a test workload for us to run on, hold the basic necessities.
        """

        workload = testWorkload("Tier1ReReco")
        rereco = workload.getTask("ReReco")
        seederDict = {"generator.initialSeed": 1001, "evtgenproducer.initialSeed": 1001}
        rereco.addGenerator("PresetSeeder", **seederDict)

        taskMaker = TaskMaker(workload, os.path.join(self.testDir, 'workloadTest'))
        taskMaker.skipSubscription = True
        taskMaker.processWorkload()

        return workload

    def getConfig(self):
        """
        _getConfig_

        Creates a common config.
        """

        config = self.testInit.getConfiguration()
        self.testInit.generateWorkDir(config)

        # First the general stuff
        config.section_("General")
        config.General.workDir = os.getenv("TESTDIR", os.getcwd())

        config.section_("Agent")
        config.Agent.componentName = self.componentName

        # Now the CoreDatabase information
        # This should be the dialect, dburl, etc
        config.section_("CoreDatabase")
        config.CoreDatabase.connectUrl = os.getenv("DATABASE")
        config.CoreDatabase.socket = os.getenv("DBSOCK")

        config.component_("JobCreator")
        config.JobCreator.namespace = 'WMComponent.JobCreator.JobCreator'
        # The log level of the component.
        # config.JobCreator.logLevel = 'SQLDEBUG'
        config.JobCreator.logLevel = 'INFO'

        # maximum number of threads we want to deal
        # with messages per pool.
        config.JobCreator.maxThreads = 1
        config.JobCreator.UpdateFromResourceControl = True
        config.JobCreator.pollInterval = 10
        # config.JobCreator.jobCacheDir               = self.testDir
        config.JobCreator.defaultJobType = 'processing'  # Type of jobs that we run, used for resource control
        config.JobCreator.workerThreads = 4
        config.JobCreator.componentDir = self.testDir
        config.JobCreator.useWorkQueue = True
        config.JobCreator.WorkQueueParams = {'emulateDBSReader': True}

        # We now call the JobMaker from here
        config.component_('JobMaker')
        config.JobMaker.logLevel = 'INFO'
        config.JobMaker.namespace = 'WMCore.WMSpec.Makers.JobMaker'
        config.JobMaker.maxThreads = 1
        config.JobMaker.makeJobsHandler = 'WMCore.WMSpec.Makers.Handlers.MakeJobs'

        # JobStateMachine
        config.component_('JobStateMachine')
        config.JobStateMachine.couchurl = os.getenv('COUCHURL', 'cmssrv52.fnal.gov:5984')
        config.JobStateMachine.couchDBName = self.couchdbname

        return config

    def testVerySimpleTest(self):
        """
        _VerySimpleTest_

        Just test that everything works...more or less
        """

        # return

        myThread = threading.currentThread()

        config = self.getConfig()

        name = makeUUID()
        nSubs = 5
        nFiles = 10
        workloadName = 'TestWorkload'

        dummyWorkload = self.createWorkload(workloadName=workloadName)
        workloadPath = os.path.join(self.testDir, 'workloadTest', 'TestWorkload', 'WMSandbox', 'WMWorkload.pkl')

        self.createJobCollection(name=name, nSubs=nSubs, nFiles=nFiles, workflowURL=workloadPath)

        testJobCreator = JobCreatorPoller(config=config)

        # First, can we run once without everything crashing?
        testJobCreator.algorithm()

        getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs")
        result = getJobsAction.execute(state='Created', jobType="Processing")

        self.assertEqual(len(result), nSubs * nFiles)

        # Count database objects
        result = myThread.dbi.processData('SELECT * FROM wmbs_sub_files_acquired')[0].fetchall()
        self.assertEqual(len(result), nSubs * nFiles)

        # Find the test directory
        testDirectory = os.path.join(self.testDir, 'jobCacheDir', 'TestWorkload', 'ReReco')
        # It should have at least one jobGroup
        self.assertTrue('JobCollection_1_0' in os.listdir(testDirectory))
        # But no more then twenty
        self.assertTrue(len(os.listdir(testDirectory)) <= 20)

        groupDirectory = os.path.join(testDirectory, 'JobCollection_1_0')

        # First job should be in here
        listOfDirs = []
        for tmpDirectory in os.listdir(testDirectory):
            listOfDirs.extend(os.listdir(os.path.join(testDirectory, tmpDirectory)))
        self.assertTrue('job_1' in listOfDirs)
        self.assertTrue('job_2' in listOfDirs)
        self.assertTrue('job_3' in listOfDirs)
        jobDir = os.listdir(groupDirectory)[0]
        jobFile = os.path.join(groupDirectory, jobDir, 'job.pkl')
        self.assertTrue(os.path.isfile(jobFile))
        f = open(jobFile, 'r')
        job = pickle.load(f)
        f.close()

        self.assertEqual(job.baggage.PresetSeeder.generator.initialSeed, 1001)
        self.assertEqual(job.baggage.PresetSeeder.evtgenproducer.initialSeed, 1001)

        self.assertEqual(job['workflow'], name)
        self.assertEqual(len(job['input_files']), 1)
        self.assertEqual(os.path.basename(job['sandbox']), 'TestWorkload-Sandbox.tar.bz2')

        return

    @attr('performance', 'integration')
    def testProfilePoller(self):
        """
        Profile your performance
        You shouldn't be running this normally because it doesn't do anything

        """

        myThread = threading.currentThread()

        name = makeUUID()
        nSubs = 5
        nFiles = 1500
        workloadName = 'TestWorkload'

        workload = self.createWorkload(workloadName=workloadName)
        workloadPath = os.path.join(self.testDir, 'workloadTest', 'TestWorkload', 'WMSandbox', 'WMWorkload.pkl')

        self.createJobCollection(name=name, nSubs=nSubs, nFiles=nFiles, workflowURL=workloadPath)

        config = self.getConfig()

        testJobCreator = JobCreatorPoller(config=config)
        cProfile.runctx("testJobCreator.algorithm()", globals(), locals(), filename="testStats.stat")

        getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs")
        result = getJobsAction.execute(state='Created', jobType="Processing")

        time.sleep(10)

        self.assertEqual(len(result), nSubs * nFiles)

        p = pstats.Stats('testStats.stat')
        p.sort_stats('cumulative')
        p.print_stats(.2)

        return

    @attr('integration')
    def testProfileWorker(self):
        """
        Profile where the work actually gets done
        You shouldn't be running this one either, since it doesn't test anything.
        """

        myThread = threading.currentThread()

        name = makeUUID()
        nSubs = 5
        nFiles = 500
        workloadName = 'TestWorkload'

        workload = self.createWorkload(workloadName=workloadName)
        workloadPath = os.path.join(self.testDir, 'workloadTest', 'TestWorkload', 'WMSandbox', 'WMWorkload.pkl')

        self.createJobCollection(name=name, nSubs=nSubs, nFiles=nFiles, workflowURL=workloadPath)

        config = self.getConfig()

        configDict = {"couchURL": config.JobStateMachine.couchurl,
                      "couchDBName": config.JobStateMachine.couchDBName,
                      'jobCacheDir': config.JobCreator.jobCacheDir,
                      'defaultJobType': config.JobCreator.defaultJobType}

        subs = [{"subscription": 1}, {"subscription": 2}, {"subscription": 3}, {"subscription": 4},
                {"subscription": 5}]

        testJobCreator = JobCreatorPoller(**configDict)
        cProfile.runctx("testJobCreator.algorithm(parameters = input)", globals(), locals(), filename="workStats.stat")

        p = pstats.Stats('workStats.stat')
        p.sort_stats('cumulative')
        p.print_stats(.2)

        return

    @attr('integration')
    def testHugeTest(self):
        """
        Don't run this one either

        """

        myThread = threading.currentThread()

        config = self.getConfig()

        name = makeUUID()
        nSubs = 10
        nFiles = 5000
        workloadName = 'Tier1ReReco'

        dummyWorkload = self.createWorkload(workloadName=workloadName)
        workloadPath = os.path.join(self.testDir, 'workloadTest', 'TestWorkload', 'WMSandbox', 'WMWorkload.pkl')

        self.createJobCollection(name=name, nSubs=nSubs, nFiles=nFiles, workflowURL=workloadPath)

        testJobCreator = JobCreatorPoller(config=config)

        # First, can we run once without everything crashing?
        startTime = time.time()
        testJobCreator.algorithm()
        stopTime = time.time()

        getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs")
        result = getJobsAction.execute(state='Created', jobType="Processing")

        self.assertEqual(len(result), nSubs * nFiles)

        print("Job took %f seconds to run" % (stopTime - startTime))

        # Count database objects
        result = myThread.dbi.processData('SELECT * FROM wmbs_sub_files_acquired')[0].fetchall()
        self.assertEqual(len(result), nSubs * nFiles)

        return

    def stuffWMBS(self, workflowURL, name):
        """
        _stuffWMBS_

        Insert some dummy jobs, jobgroups, filesets, files and subscriptions
        into WMBS to test job creation.  Three completed job groups each
        containing several files are injected.  Another incomplete job group is
        also injected.  Also files are added to the "Mergeable" subscription as
        well as to the output fileset for their jobgroups.
        """
        locationAction = self.daoFactory(classname="Locations.New")
        locationAction.execute(siteName="s1", pnn="somese.cern.ch")

        mergeFileset = Fileset(name="mergeFileset")
        mergeFileset.create()
        bogusFileset = Fileset(name="bogusFileset")
        bogusFileset.create()

        mergeWorkflow = Workflow(spec=workflowURL, owner="mnorman",
                                 name=name, task="/TestWorkload/ReReco")
        mergeWorkflow.create()

        mergeSubscription = Subscription(fileset=mergeFileset,
                                         workflow=mergeWorkflow,
                                         split_algo="ParentlessMergeBySize")
        mergeSubscription.create()
        dummySubscription = Subscription(fileset=bogusFileset,
                                         workflow=mergeWorkflow,
                                         split_algo="ParentlessMergeBySize")

        file1 = File(lfn="file1", size=1024, events=1024, first_event=0,
                     locations={"somese.cern.ch"})
        file1.addRun(Run(1, *[45]))
        file1.create()
        file2 = File(lfn="file2", size=1024, events=1024, first_event=1024, locations={"somese.cern.ch"})
        file2.addRun(Run(1, *[45]))
        file2.create()
        file3 = File(lfn="file3", size=1024, events=1024, first_event=2048, locations={"somese.cern.ch"})
        file3.addRun(Run(1, *[45]))
        file3.create()
        file4 = File(lfn="file4", size=1024, events=1024, first_event=3072, locations={"somese.cern.ch"})
        file4.addRun(Run(1, *[45]))
        file4.create()

        fileA = File(lfn="fileA", size=1024, events=1024, first_event=0, locations={"somese.cern.ch"})
        fileA.addRun(Run(1, *[46]))
        fileA.create()
        fileB = File(lfn="fileB", size=1024, events=1024, first_event=1024, locations={"somese.cern.ch"})
        fileB.addRun(Run(1, *[46]))
        fileB.create()
        fileC = File(lfn="fileC", size=1024, events=1024, first_event=2048, locations={"somese.cern.ch"})
        fileC.addRun(Run(1, *[46]))
        fileC.create()

        fileI = File(lfn="fileI", size=1024, events=1024, first_event=0, locations={"somese.cern.ch"})
        fileI.addRun(Run(2, *[46]))
        fileI.create()
        fileII = File(lfn="fileII", size=1024, events=1024, first_event=1024, locations={"somese.cern.ch"})
        fileII.addRun(Run(2, *[46]))
        fileII.create()
        fileIII = File(lfn="fileIII", size=1024, events=1024, first_event=2048, locations={"somese.cern.ch"})
        fileIII.addRun(Run(2, *[46]))
        fileIII.create()
        fileIV = File(lfn="fileIV", size=1024 * 1000000, events=1024, first_event=3072, locations={"somese.cern.ch"})
        fileIV.addRun(Run(2, *[46]))
        fileIV.create()

        for fileObj in [file1, file2, file3, file4, fileA, fileB, fileC, fileI, fileII, fileIII, fileIV]:
            mergeFileset.addFile(fileObj)
            bogusFileset.addFile(fileObj)

        mergeFileset.commit()
        bogusFileset.commit()

        return

    def testTestNonProxySplitting(self):
        """
        _TestNonProxySplitting_

        Test and see if we can split things without a proxy.
        """

        config = self.getConfig()
        config.JobCreator.workerThreads = 1

        name = makeUUID()
        workloadName = 'TestWorkload'

        workload = self.createWorkload(workloadName=workloadName)

        workloadPath = os.path.join(self.testDir, 'workloadTest', 'TestWorkload', 'WMSandbox', 'WMWorkload.pkl')

        self.stuffWMBS(workflowURL=workloadPath, name=name)

        testJobCreator = JobCreatorPoller(config=config)

        testJobCreator.algorithm()

        getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs")
        result = getJobsAction.execute(state='Created', jobType="Processing")

        self.assertEqual(len(result), 1)

        result = getJobsAction.execute(state='Created', jobType="Merge")
        self.assertEqual(len(result), 0)

        return
Beispiel #27
0
    def setUp(self):
        """
        setup for test.
        """

        myThread = threading.currentThread()

        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        #self.tearDown()
        self.testInit.setSchema(customModules = ["WMCore.WMBS", "WMCore.BossAir", "WMCore.ResourceControl", "WMCore.Agent.Database"],
                                useDefault = False)
        self.testInit.setupCouch("bossair_t/jobs", "JobDump")
        self.testInit.setupCouch("bossair_t/fwjrs", "FWJRDump")

        self.daoFactory = DAOFactory(package = "WMCore.WMBS",
                                     logger = myThread.logger,
                                     dbinterface = myThread.dbi)
        self.getJobs = self.daoFactory(classname = "Jobs.GetAllJobs")


        locationAction = self.daoFactory(classname = "Locations.New")
        locationSlots  = self.daoFactory(classname = "Locations.SetJobSlots")



        #Create sites in resourceControl
        resourceControl = ResourceControl()
        for site in self.sites:
            resourceControl.insertSite(siteName = site, seName = 'se.%s' % (site),
                                       ceName = site, plugin = "CondorPlugin", jobSlots = 1000)
            resourceControl.insertThreshold(siteName = site, taskType = 'Processing', \
                                            maxSlots = 1000)
        resourceControl.insertSite(siteName = 'Xanadu', seName = 'se.Xanadu',
                                   ceName = 'Xanadu', plugin = "TestPlugin")
        resourceControl.insertThreshold(siteName = 'Xanadu', taskType = 'Processing', \
                                        maxSlots = 10000)

        resourceControl.insertSite(siteName = 'jade-cms.hip.fi', seName = 'madhatter.csc.fi',
                                   ceName = 'jade-cms.hip.fi', plugin = "ARCPlugin")
        resourceControl.insertThreshold(siteName = 'jade-cms.hip.fi', taskType = 'Processing', \
                                        maxSlots = 100)
        # using this for glite submissions
        resourceControl.insertSite(siteName = 'grid-ce-01.ba.infn.it', seName = 'storm-se-01.ba.infn.it',
                                   ceName = 'grid-ce-01.ba.infn.it', plugin = 'gLitePlugin')
        resourceControl.insertThreshold(siteName = 'grid-ce-01.ba.infn.it', taskType = 'Processing', \
                                        maxSlots = 50)

        # Create user
        newuser = self.daoFactory(classname = "Users.New")
        newuser.execute(dn = "mnorman", group_name = "phgroup", role_name = "cmsrole")


        # We actually need the user name
        self.user = getpass.getuser()

        self.testDir = self.testInit.generateWorkDir()


        # Set heartbeat
        componentName = 'test'
        self.heartbeatAPI  = HeartbeatAPI(componentName)
        self.heartbeatAPI.registerComponent()
        componentName = 'JobTracker'
        self.heartbeatAPI2  = HeartbeatAPI(componentName)
        self.heartbeatAPI2.registerComponent()

        return
Beispiel #28
0
class Harness(object):
    """
    Harness class that wraps standard functionality used in all daemon
    components
    """
    def __init__(self, config, compName=None):
        """
        init

        The constructor is empty as we have an initalization method
        that can be called inside new threads (we use thread local attributes
        at startup.

        Default intialization of the harness including setting some diagnostic
        messages
        """
        self.config = config

        # component name is always the class name of child class
        if not compName:
            compName = self.__class__.__name__

        if compName not in (self.config.listComponents_() +
                            self.config.listWebapps_()):
            raise WMException(WMEXCEPTION['WMCORE-8'] + compName, 'WMCORE-8')
        if not hasattr(self.config, "Agent"):
            self.config.section_("Agent")

        self.config.Agent.componentName = compName
        compSect = getattr(self.config, compName, None)
        if compSect is None:
            # Then we have a major problem - there's no section with this name
            logging.error("Could not find section %s in config", compName)
            logging.error(
                "We are returning, and hoping you know what you're doing!")
            logging.debug("Config: %s", self.config)
            return
        # check if componentDir is set if not assign.
        if getattr(compSect, 'componentDir', None) is None:
            if not hasattr(self.config, "General"):
                # Don't do anything.  Assume the user knows what they are doing.
                logging.error(
                    "Missing componentDir and General section in config")
                logging.error("Going to trust you to know what you're doing.")
                return

            compSect.componentDir = os.path.join(
                self.config.General.workDir, 'Components',
                self.config.Agent.componentName)
        # we have name and location of the log files. Now make sure there
        # is a directory.
        try:
            if not os.path.isdir(compSect.componentDir):
                os.makedirs(compSect.componentDir)
        except Exception as ex:
            logging.error(
                "Encountered exception while making componentDirs: %s",
                str(ex))
            logging.error("Ignoring")

        self.threadManagerName = ''
        self.heartbeatAPI = None
        self.messages = {}
        self.logMsg = {}

        return

    def initInThread(self):
        """
        Default intialization of the harness including setting some diagnostic
        messages. This method is called when we call 'prepareToStart'
        """
        try:
            self.messages = {}

            compName = self.config.Agent.componentName
            compSect = getattr(self.config, compName, None)
            if not hasattr(compSect, "logFile"):
                if not getattr(compSect, 'componentDir', None):
                    errorMessage = "No componentDir for log entries found!\n"
                    errorMessage += "Harness cannot run without componentDir.\n"
                    logging.error(errorMessage)
                    raise HarnessException(errorMessage)
                compSect.logFile = os.path.join(compSect.componentDir,
                                                "ComponentLog")
            print('Log file is: ' + compSect.logFile)
            logHandler = RotatingFileHandler(compSect.logFile, "a", 1000000000,
                                             3)
            logMsgFormat = getattr(
                compSect, "logMsgFormat",
                "%(asctime)s:%(thread)d:%(levelname)s:%(module)s:%(message)s")
            logFormatter = \
                logging.Formatter(logMsgFormat)
            logHandler.setFormatter(logFormatter)
            logLevelName = getattr(compSect, 'logLevel', 'INFO')
            logLevel = getattr(logging, logLevelName)
            logging.getLogger().addHandler(logHandler)
            logging.getLogger().setLevel(logLevel)
            self.logMsg = {
                'DEBUG': logging.DEBUG,
                'ERROR': logging.ERROR,
                'NOTSET': logging.NOTSET,
                'CRITICAL': logging.CRITICAL,
                'WARNING': logging.WARNING,
                'INFO': logging.INFO,
                'SQLDEBUG': logging.SQLDEBUG
            }
            if hasattr(compSect,
                       "logLevel") and compSect.logLevel in self.logMsg:
                logging.getLogger().setLevel(self.logMsg[compSect.logLevel])
            WMLogging.sqldebug("wmcore level debug:")

            # If not previously set, force wmcore cache to current path
            if not os.environ.get('WMCORE_CACHE_DIR'):
                os.environ['WMCORE_CACHE_DIR'] = os.path.join(
                    compSect.componentDir, '.wmcore_cache')

            logging.info(">>>Starting: " + compName + '<<<')
            # check which backend to use: MySQL, Oracle, etc... for core
            # services.
            # we recognize there can be more than one database.
            # be we offer a default database that is used for core services.
            logging.info(">>>Initializing default database")
            logging.info(">>>Check if connection is through socket")
            myThread = threading.currentThread()
            myThread.logger = logging.getLogger()
            logging.info(">>>Setting config for thread: ")
            myThread.config = self.config

            logging.info(">>>Building database connection string")
            # check if there is a premade string if not build it yourself.
            dbConfig = ConfigDBMap(self.config)
            dbStr = dbConfig.getDBUrl()
            options = dbConfig.getOption()
            # we only want one DBFactory per database so we will need to
            # to pass this on in case we are using threads.
            myThread.dbFactory = DBFactory(myThread.logger, dbStr, options)

            myThread.sql_transaction = True
            if myThread.dbFactory.engine:

                myThread.dbi = myThread.dbFactory.connect()
                myThread.transaction = Transaction(myThread.dbi)

            else:

                myThread.dbi = myThread.config.CoreDatabase.connectUrl
                myThread.sql_transaction = False

            # Attach a worker manager object to the main thread
            if not hasattr(myThread, 'workerThreadManager'):
                myThread.workerThreadManager = WorkerThreadManager(self)
            else:
                myThread.workerThreadManager.terminateSlaves.clear()
            myThread.workerThreadManager.pauseWorkers()

            logging.info(">>>Initialize transaction dictionary")

            (connectDialect, dummy) = dbStr.split(":", 1)

            if connectDialect.lower() == 'mysql':
                myThread.dialect = 'MySQL'
            elif connectDialect.lower() == 'oracle':
                myThread.dialect = 'Oracle'

            logging.info("Harness part constructor finished")
        except Exception as ex:
            logging.critical("Problem instantiating " + str(ex))
            logging.error("Traceback: %s", str(traceback.format_exc()))
            raise

    def preInitialization(self):
        """
        _preInitialization_

        returns: nothing

        method that can be overloaded and will be called before the
        start component is called. (enables you to set message->handler
        mappings). You use the self.message dictionary of the base class
        to define the mappings.

        """
        pass

    def postInitialization(self):
        """
        _postInitialization_

        returns: nothing

        method that can be overloaded and will be called after the start
        component does the standard initialization, but before the wait
        (enables you to publish events when starting up)

        Define actions you want to execute before the actual message
        handling starts. E.g.: publishing some messages, or removing
        messages.

        """
        pass

    def logState(self):
        """
        _logState_

        returns: string

        method that can be overloaded to log additional state information
        (should return atring)
        """
        msg = 'No additional state information for ' + \
              self.config.Agent.componentName
        return msg

    def publishItem(self, items):
        """
        _publishItem_

        returns: nothing

        A method that publishes a (dictionary) set or 1 item
        to a monitoring service.
        """
        # FIXME: do we need this method. If so we need to agree
        # FIXME: on some default monitoring publication mechanism.
        pass

    def __call__(self, event, payload):
        """
        Once upon a time this was for doing the handling of diagnostic messages

        With the test-deprecating of the MsgService based diagnostics, we've basically
        scratched this.

        I'm leaving this in so at least the framework is still there

        -mnorman
        """
        return

    def initialization(self):
        """
        _initialization__

        Used the handle initializing the MsgService.  The MsgService
        is no longer used.

        Removed but not deleted, since all sorts of things call it
        """
        return

    def prepareToStart(self):
        """
        _prepareToStart_

        returns: Nothing

        Starts the initialization procedure. It is mainly an aggregation method
        so it can easily used in tests.
        """
        self.state = 'initialize'
        self.initInThread()
        # note: every component gets a (unique) name:
        # self.config.Agent.componentName
        logging.info(">>>Registering Component - %s",
                     self.config.Agent.componentName)

        if getattr(self.config.Agent, "useHeartbeat", True):
            self.heartbeatAPI = HeartbeatAPI(self.config.Agent.componentName)
            self.heartbeatAPI.registerComponent()

        logging.info('>>>Starting initialization')

        logging.info('>>>Setting default transaction')
        myThread = threading.currentThread()

        self.preInitialization()

        if myThread.sql_transaction:
            myThread.transaction.begin()

        self.initialization()
        self.postInitialization()

        if myThread.sql_transaction:
            myThread.transaction.commit()

        logging.info('>>>Committing default transaction')

        logging.info(">>>Starting worker threads")
        myThread.workerThreadManager.resumeWorkers()

        logging.info(">>>Initialization finished!\n")
        # wait for messages
        self.state = 'active'

    def prepareToStop(self, wait=False, stopPayload=""):
        """
        _stopComponent

        Stops the component, including all worker threads. Allows call from
        test framework
        """
        # Stop all worker threads
        logging.info(">>>Terminating worker threads")
        myThread = threading.currentThread()
        try:
            myThread.workerThreadManager.terminateWorkers()
        except Exception:
            # We may not have a thread manager
            pass

        if wait:
            logging.info(
                ">>>Shut down of component while waiting for threads to finish"
            )
            # check if nr of threads is specified.
            activeThreads = 1
            if stopPayload != "":
                activeThreads = int(stopPayload)
                if activeThreads < 1:
                    activeThreads = 1
            while threading.activeCount() > activeThreads:
                logging.info('>>>Currently ' + str(threading.activeCount()) +
                             ' threads active')
                logging.info('>>>Waiting for less than ' + str(activeThreads) +
                             ' to be active')
                time.sleep(5)

    def handleMessage(self, type='', payload=''):
        """
        __handleMessage_

        Formerly used to handle messages - now non-functional
        Left here in case someone else is using it (i.e. PilotManager)
        """
        return

    def startDaemon(self, keepParent=False, compName=None):
        """
        Same result as start component, except that the comopnent
        is started as a daemon, after which you can close your xterm
        and the process will still run.

        The keepParent option enables us to keep the parent process
        which is used during testing,
        """
        msg = "Starting %s as a daemon " % self.config.Agent.componentName
        print(msg)
        if not compName:
            compName = self.__class__.__name__
        compSect = getattr(self.config, compName, None)
        msg = "Log will be in %s " % compSect.componentDir
        print(msg)
        # put the daemon config file in the work dir of this component.
        # FIXME: this file will be replaced by a database table.
        compSect = getattr(self.config, self.config.Agent.componentName, None)
        pid = createDaemon(compSect.componentDir, keepParent)
        # if this is not the parent start the component
        if pid == 0:
            self.startComponent()
            # if this is the parent return control to the testing environment.

    def startComponent(self):
        """
        _startComponent_

        returns: Nothing

        Start up the component, performs initialization and waits indefinitely
        Calling this method results in the application
        running in the xterm (not in daemon mode)

        """
        myThread = threading.currentThread()
        try:
            msg = 'None'
            self.prepareToStart()
            while True:
                time.sleep(360)

        except Exception as ex:
            if self.state == 'initialize':
                errormsg = """PostMortem: choked when initializing with error: %s\n""" % (
                    str(ex))
                stackTrace = traceback.format_tb(sys.exc_info()[2], None)
                for stackFrame in stackTrace:
                    errormsg += stackFrame
            else:
                errormsg = ""
                stackTrace = traceback.format_tb(sys.exc_info()[2], None)
                for stackFrame in stackTrace:
                    errormsg += stackFrame
                logging.error(errormsg)
                logging.error(
                    ">>>Fatal Error, Preparing to Rollback Transaction")
                if getattr(myThread, 'transaction', None) is not None:
                    myThread.transaction.rollback()
                self.prepareToStop(False)
                errormsg = """
PostMortem: choked while handling messages  with error: %s
while trying to handle msg: %s
                """ % (str(ex), str(msg))
            print(errormsg)
            logging.critical(errormsg)
            raise
        logging.info("System shutdown complete!")
        # this is to ensure exiting when in daemon mode.
        sys.exit()

    def __str__(self):
        """

        return: string

        String representation of the status of this component.
        """

        msg = 'Status of this component : \n'
        msg += '\n'
        msg += '>>Event Subscriptions --> Handlers<<\n'
        msg += '------------------------------------\n'
        for message in self.messages:
            msg += message + '-->' + str(self.messages[message]) + '\n'
        msg += '\n'
        msg += '\n'
        msg += '>>Parameters --> Values<<\n'
        msg += '-------------------------\n'
        msg += str(self.config)
        additionalMsg = self.logState()
        if additionalMsg != '':
            msg += '\n'
            msg += 'Additional state information\n'
            msg += '----------------------------\n'
            msg += '\n'
            msg += str(additionalMsg)
            msg += '\n'
        return msg
Beispiel #29
0
    def __init__(self, slaveClassName, totalSlaves, componentDir,
                 config, namespace = 'WMComponent', inPort = '5555',
                 outPort = '5558'):
        """
        __init__

        Constructor for the process pool.  The slave class name must be based
        inside the WMComponent namespace.  For examples, the JobAccountant would
        pass in 'JobAccountant.AccountantWorker' to run the AccountantWorker
        class.  All log files will be stored in the component directory that is
        passed in.  Each slave will have its own log file.

        Note that the config is only used to determine database connection
        parameters.  It is not passed to the slave class.  The slaveInit
        parameter will be serialized and passed to the slave class's
        constructor.
        """
        self.enqueueIndex = 0
        self.dequeueIndex = 0
        self.runningWork  = 0

        #Use the Services.Requests JSONizer, which handles __to_json__ calls
        self.jsonHandler = JSONRequests()

        # heartbeat should be registered at this point
        if getattr(config.Agent, "useHeartbeat", True):
            self.heartbeatAPI = HeartbeatAPI(getattr(config.Agent, "componentName", "ProcPoolSlave"))

        self.slaveClassName = slaveClassName
        self.componentDir   = componentDir
        self.config         = config
        # Grab the python version from the current version
        # Assume naming convention pythonA.B, i.e., python2.4 for v2.4.X
        majorVersion = sys.version_info[0]
        minorVersion = sys.version_info[1]

        if majorVersion and minorVersion:
            self.versionString = "python%i.%i" % (majorVersion, minorVersion)
        else:
            self.versionString = "python2.6"

        self.workers   = []
        self.nSlaves   = totalSlaves
        self.namespace = namespace
        self.inPort    = inPort
        self.outPort   = outPort


        # Pickle the config
        self.configPath = os.path.join(componentDir, '%s_config.pkl' % slaveClassName)
        if os.path.exists(self.configPath):
            # Then we note it and overwrite it
            msg =  "Something's in the way of the ProcessPool config: %s" % self.configPath
            logging.error(msg)
        f = open(self.configPath, 'w')
        cPickle.dump(config, f)
        f.close()

        # Set up ZMQ
        try:
            context = zmq.Context()
            self.sender = context.socket(zmq.PUSH)
            self.sender.bind("tcp://*:%s" % inPort)
            self.sink = context.socket(zmq.PULL)
            self.sink.bind("tcp://*:%s" % outPort)
        except zmq.ZMQError:
            # Try this again in a moment to see
            # if it's just being held by something pre-existing
            import time
            time.sleep(1)
            logging.error("Blocked socket on startup: Attempting sleep to give it time to clear.")
            try:
                context = zmq.Context()
                self.sender = context.socket(zmq.PUSH)
                self.sender.bind("tcp://*:%s" % inPort)
                self.sink = context.socket(zmq.PULL)
                self.sink.bind("tcp://*:%s" % outPort)
            except Exception as ex:
                msg =  "Error attempting to open TCP sockets\n"
                msg += str(ex)
                logging.error(msg)
                import traceback
                print traceback.format_exc()
                raise ProcessPoolException(msg)

        # Now actually create the slaves
        self.createSlaves()


        return
Beispiel #30
0
class BossAirTest(unittest.TestCase):
    """
    Tests for the BossAir prototype

    """

    sites = ['T2_US_UCSD', 'T2_TW_Taiwan', 'T1_CH_CERN', 'T2_US_Florida']

    def setUp(self):
        """
        setup for test.
        """

        myThread = threading.currentThread()

        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.tearDown()
        self.testInit.setSchema(customModules = ["WMCore.WMBS", "WMCore.BossAir", "WMCore.ResourceControl", "WMCore.Agent.Database"],
                                useDefault = False)
        self.testInit.setupCouch("bossair_t/jobs", "JobDump")
        self.testInit.setupCouch("bossair_t/fwjrs", "FWJRDump")

        self.daoFactory = DAOFactory(package = "WMCore.WMBS",
                                     logger = myThread.logger,
                                     dbinterface = myThread.dbi)
        self.getJobs = self.daoFactory(classname = "Jobs.GetAllJobs")

        #Create sites in resourceControl
        resourceControl = ResourceControl()
        for site in self.sites:
            resourceControl.insertSite(siteName = site, seName = 'se.%s' % (site), cmsName = site,
                                       ceName = site, plugin = "CondorPlugin", pendingSlots = 1000,
                                       runningSlots = 2000)
            resourceControl.insertThreshold(siteName = site, taskType = 'Processing', \
                                            maxSlots = 1000, pendingSlots = 1000)
        resourceControl.insertSite(siteName = 'Xanadu', seName = 'se.Xanadu',cmsName = site,
                                   ceName = 'Xanadu', plugin = "TestPlugin")
        resourceControl.insertThreshold(siteName = 'Xanadu', taskType = 'Processing', \
                                        maxSlots = 10000, pendingSlots = 10000)

        resourceControl.insertSite(siteName = 'jade-cms.hip.fi', seName = 'madhatter.csc.fi', cmsName = site,
                                   ceName = 'jade-cms.hip.fi', plugin = "ARCPlugin")
        resourceControl.insertThreshold(siteName = 'jade-cms.hip.fi', taskType = 'Processing', \
                                        maxSlots = 100, pendingSlots = 100)
        # using this for glite submissions
        resourceControl.insertSite(siteName = 'grid-ce-01.ba.infn.it', seName = 'storm-se-01.ba.infn.it', cmsName = site,
                                   ceName = 'grid-ce-01.ba.infn.it', plugin = 'gLitePlugin')
        resourceControl.insertThreshold(siteName = 'grid-ce-01.ba.infn.it', taskType = 'Processing', \
                                        maxSlots = 50, pendingSlots = 50)

        # Create user
        newuser = self.daoFactory(classname = "Users.New")
        newuser.execute(dn = "tapas", group_name = "phgroup", role_name = "cmsrole")


        # We actually need the user name
        self.user = getpass.getuser()

        # Change this to the working dir to keep track of error and log files from condor
        self.testInit.generateWorkDir()

        # Set heartbeat
        componentName = 'test'
        self.heartbeatAPI  = HeartbeatAPI(componentName)
        self.heartbeatAPI.registerComponent()
        componentName = 'JobTracker'
        self.heartbeatAPI2  = HeartbeatAPI(componentName)
        self.heartbeatAPI2.registerComponent()

        return

    def tearDown(self):
        """
        Database deletion
        """
        #self.testInit.clearDatabase(modules = ["WMCore.WMBS", "WMCore.BossAir", "WMCore.ResourceControl", "WMCore.Agent.Database"])

        self.testInit.delWorkDir()

        self.testInit.tearDownCouch()

        return



    def getConfig(self):
        """
        _getConfig_

        Build a basic BossAir config
        """

        config = self.testInit.getConfiguration()

        config.section_("Agent")
        config.Agent.agentName  = 'testAgent'
        config.Agent.componentName = 'test'
        config.Agent.useHeartbeat = False

        config.section_("CoreDatabase")
        config.CoreDatabase.connectUrl = os.getenv("DATABASE")
        config.CoreDatabase.socket     = os.getenv("DBSOCK")


        config.section_("BossAir")
        config.BossAir.pluginNames = ['TestPlugin', 'CondorPlugin']
        config.BossAir.pluginDir   = 'WMCore.BossAir.Plugins'
        config.BossAir.UISetupScript = '/afs/cern.ch/cms/LCG/LCG-2/UI/cms_ui_env.sh'

        config.component_("JobSubmitter")
        config.JobSubmitter.logLevel      = 'INFO'
        config.JobSubmitter.pollInterval  = 1
        config.JobSubmitter.pluginName    = 'AirPlugin'
        config.JobSubmitter.pluginDir     = 'JobSubmitter.Plugins'
        config.JobSubmitter.submitDir     = os.path.join(self.testDir, 'submit')
        config.JobSubmitter.submitNode    = os.getenv("HOSTNAME", 'stevia.hep.wisc.edu')
        config.JobSubmitter.submitScript  = os.path.join(WMCore.WMInit.getWMBASE(),
                                                         'test/python/WMComponent_t/JobSubmitter_t',
                                                         'submit.sh')
        config.JobSubmitter.componentDir  = os.path.join(os.getcwd(), 'Components')
        config.JobSubmitter.workerThreads = 2
        config.JobSubmitter.jobsPerWorker = 200
        config.JobSubmitter.gLiteConf     = os.path.join(os.getcwd(), 'config.cfg')



        # JobTracker
        config.component_("JobTracker")
        config.JobTracker.logLevel      = 'INFO'
        config.JobTracker.pollInterval  = 1


        # JobStateMachine
        config.component_('JobStateMachine')
        config.JobStateMachine.couchurl        = os.getenv('COUCHURL')
        config.JobStateMachine.couchDBName     = "bossair_t"

        # JobStatusLite
        config.component_('JobStatusLite')
        config.JobStatusLite.componentDir = os.path.join(os.getcwd(), 'Components')
        config.JobStatusLite.stateTimeouts = {'Pending': 10, 'Running': 86400}
        config.JobStatusLite.pollInterval = 1


        return config


    def createTestWorkload(self, workloadName = 'Test', emulator = True):
        """
        _createTestWorkload_

        Creates a test workload for us to run on, hold the basic necessities.
        """


        workload = testWorkload("Tier1ReReco")
        rereco = workload.getTask("ReReco")


        taskMaker = TaskMaker(workload, os.path.join(self.testDir, 'workloadTest'))
        taskMaker.skipSubscription = True
        taskMaker.processWorkload()

        workload.save(workloadName)

        return workload



    def createJobGroups(self, nSubs, nJobs, task, workloadSpec, site = None, bl = [], wl = []):
        """
        Creates a series of jobGroups for submissions

        """

        jobGroupList = []

        testWorkflow = Workflow(spec = workloadSpec, owner = "tapas",
                                name = makeUUID(), task="basicWorkload/Production",
                                owner_vogroup = 'phgroup', owner_vorole = 'cmsrole')
        testWorkflow.create()

        # Create subscriptions
        for i in range(nSubs):

            name = makeUUID()

            # Create Fileset, Subscription, jobGroup
            testFileset = Fileset(name = name)
            testFileset.create()
            testSubscription = Subscription(fileset = testFileset,
                                            workflow = testWorkflow,
                                            type = "Processing",
                                            split_algo = "FileBased")
            testSubscription.create()

            testJobGroup = JobGroup(subscription = testSubscription)
            testJobGroup.create()


            # Create jobs
            self.makeNJobs(name = name, task = task,
                           nJobs = nJobs,
                           jobGroup = testJobGroup,
                           fileset = testFileset,
                           sub = testSubscription.exists(),
                           site = site, bl = bl, wl = wl)



            testFileset.commit()
            testJobGroup.commit()
            jobGroupList.append(testJobGroup)

        return jobGroupList



    def makeNJobs(self, name, task, nJobs, jobGroup, fileset, sub, site = None, bl = [], wl = []):
        """
        _makeNJobs_

        Make and return a WMBS Job and File
        This handles all those damn add-ons

        """
        # Set the CacheDir
        cacheDir = os.path.join(self.testDir, 'CacheDir')

        for n in range(nJobs):
            # First make a file
            #site = self.sites[0]
            testFile = File(lfn = "/singleLfn/%s/%s" %(name, n),
                            size = 1024, events = 10)
            if site:
                testFile.setLocation(site)
            else:
                for tmpSite in self.sites:
                    testFile.setLocation('se.%s' % (tmpSite))
            testFile.create()
            fileset.addFile(testFile)


        fileset.commit()

        index = 0
        for f in fileset.files:
            index += 1
            testJob = Job(name = '%s-%i' %(name, index))
            testJob.addFile(f)
            testJob["location"]  = f.getLocations()[0]
            testJob['custom']['location'] = f.getLocations()[0]
            testJob['task']    = task.getPathName()
            testJob['sandbox'] = task.data.input.sandbox
            testJob['spec']    = os.path.join(self.testDir, 'basicWorkload.pcl')
            testJob['mask']['FirstEvent'] = 101
            testJob['owner']   = 'tapas'
            testJob["siteBlacklist"] = bl
            testJob["siteWhitelist"] = wl
            testJob['ownerDN'] = 'tapas'
            testJob['ownerRole'] = 'cmsrole'
            testJob['ownerGroup'] = 'phgroup'

            jobCache = os.path.join(cacheDir, 'Sub_%i' % (sub), 'Job_%i' % (index))
            os.makedirs(jobCache)
            testJob.create(jobGroup)
            testJob['cache_dir'] = jobCache
            testJob.save()
            jobGroup.add(testJob)
            output = open(os.path.join(jobCache, 'job.pkl'),'w')
            pickle.dump(testJob, output)
            output.close()

        return testJob, testFile



    def createDummyJobs(self, nJobs, location = None):
        """
        _createDummyJobs_

        Create some dummy jobs
        """

        if not location:
            location = self.sites[0]

        nameStr = makeUUID()

        testWorkflow = Workflow(spec = nameStr, owner = "tapas",
                                name = nameStr, task="basicWorkload/Production",
                                owner_vogroup = 'phgroup', owner_vorole = 'cmsrole')
        testWorkflow.create()

        testFileset = Fileset(name = nameStr)
        testFileset.create()

        testSubscription = Subscription(fileset = testFileset,
                                            workflow = testWorkflow,
                                            type = "Processing",
                                            split_algo = "FileBased")
        testSubscription.create()

        testJobGroup = JobGroup(subscription = testSubscription)
        testJobGroup.create()

        jobList = []

        for i in range(nJobs):
            testJob = Job(name = '%s-%i' % (nameStr, i))
            testJob['location'] = location
            testJob['custom']['location'] = location
            testJob['userdn']   = 'tapas'
            testJob['owner']    = 'tapas'
            testJob['userrole'] = 'cmsrole'
            testJob['usergroup'] = 'phgroup'

            testJob.create(testJobGroup)
            jobList.append(testJob)

        return jobList


    @attr('integration')
    def testA_APITest(self):
        """
        _APITest_

        This is a commissioning test that has very little to do
        with anything except loading the code.
        """
        #return

        myThread = threading.currentThread()

        config = self.getConfig()

        baAPI  = BossAirAPI(config = config)

        # We should have loaded a plugin
        self.assertTrue('TestPlugin' in baAPI.plugins.keys())

        result = myThread.dbi.processData("SELECT name FROM bl_status")[0].fetchall()
        statusList = []
        for i in result:
            statusList.append(i.values()[0])

        # We should have the plugin states in the database
        self.assertEqual(statusList.sort(), ['New', 'Dead', 'Gone'].sort())

        # Create some jobs
        nJobs = 10

        jobDummies = self.createDummyJobs(nJobs = nJobs)
        print jobDummies

        baAPI.createNewJobs(wmbsJobs = jobDummies)

        runningJobs = baAPI._listRunJobs()

        self.assertEqual(len(runningJobs), nJobs)

        newJobs = baAPI._loadByStatus(status = 'New')
        self.assertEqual(len(newJobs), nJobs)
        deadJobs = baAPI._loadByStatus(status = 'Dead')
        self.assertEqual(len(deadJobs), 0)
        raisesException = False

        self.assertRaises(BossAirException,
                          baAPI._loadByStatus, status = 'FalseStatus')

        # Change the job status and update it
        for job in newJobs:
            job['status'] = 'Dead'

        baAPI._updateJobs(jobs = newJobs)


        # Test whether we see the job status as updated
        newJobs = baAPI._loadByStatus(status = 'New')
        self.assertEqual(len(newJobs), 0)
        deadJobs = baAPI._loadByStatus(status = 'Dead')
        self.assertEqual(len(deadJobs), nJobs)

        # Can we load by BossAir ID?
        loadedJobs = baAPI._loadByID(jobs = deadJobs)
        self.assertEqual(len(loadedJobs), nJobs)

        # Can we load via WMBS?
        loadedJobs = baAPI.loadByWMBS(wmbsJobs = jobDummies)
        self.assertEqual(len(loadedJobs), nJobs)


        # See if we can delete jobs
        baAPI._deleteJobs(jobs = deadJobs)

        # Confirm that they're gone
        deadJobs = baAPI._loadByStatus(status = 'Dead')
        self.assertEqual(len(deadJobs), 0)


        self.assertEqual(len(baAPI.jobs), 0)



        return


    @attr('integration')
    def testB_PluginTest(self):
        """
        _PluginTest_


        Now check that these functions worked if called through plugins
        Instead of directly.

        There are only three plugin
        """
        #return

        myThread = threading.currentThread()

        config = self.getConfig()

        baAPI  = BossAirAPI(config = config)


        # Create some jobs
        nJobs = 10

        jobDummies = self.createDummyJobs(nJobs = nJobs, location = 'Xanadu')
        changeState = ChangeState(config)
        changeState.propagate(jobDummies, 'created', 'new')
        changeState.propagate(jobDummies, 'executing', 'created')

        # Prior to building the job, each job must have a plugin
        # and user assigned
        for job in jobDummies:
            job['plugin']   = 'TestPlugin'
            job['owner']    = 'tapas'

        baAPI.submit(jobs = jobDummies)


        newJobs = baAPI._loadByStatus(status = 'New')
        self.assertEqual(len(newJobs), nJobs)

        # Should be no more running jobs
        runningJobs = baAPI._listRunJobs()
        self.assertEqual(len(runningJobs), nJobs)


        # Test Plugin should complete all jobs
        baAPI.track()

        # Should be no more running jobs
        runningJobs = baAPI._listRunJobs()
        self.assertEqual(len(runningJobs), 0)


        # Check if they're complete
        completeJobs = baAPI.getComplete()
        self.assertEqual(len(completeJobs), nJobs)


        # Do this test because BossAir is specifically built
        # to keep it from finding completed jobs
        result = myThread.dbi.processData("SELECT id FROM bl_runjob")[0].fetchall()
        self.assertEqual(len(result), nJobs)


        baAPI.removeComplete(jobs = jobDummies)


        result = myThread.dbi.processData("SELECT id FROM bl_runjob")[0].fetchall()
        self.assertEqual(len(result), 0)


        return

    def testG_monitoringDAO(self):
        """
        _monitoringDAO_

        Because I need a test for the monitoring DAO
        """

        return

        myThread = threading.currentThread()

        config = self.getConfig()

        changeState = ChangeState(config)

        baAPI  = BossAirAPI(config = config)


        # Create some jobs
        nJobs = 10

        jobDummies = self.createDummyJobs(nJobs = nJobs)

        # Prior to building the job, each job must have a plugin
        # and user assigned
        for job in jobDummies:
            job['plugin']   = 'TestPlugin'
            job['owner']    = 'tapas'
            job['location'] = 'T2_US_UCSD'
            job.save()

        baAPI.submit(jobs = jobDummies)


        results = baAPI.monitor()

        self.assertEqual(len(results), nJobs)
        for job in results:
            self.assertEqual(job['plugin'], 'CondorPlugin')


        return
Beispiel #31
0
class DBSUploadTest(unittest.TestCase):
    """
    TestCase for DBSUpload module

    Note:
      This fails if you use the in-memory syntax for sqlite
      i.e. (DATABASE = sqlite://)
    """
    _maxMessage = 10


    def setUp(self):
        """
        _setUp_

        setUp function for unittest

        """
        # Set constants
        self.couchDB      = "config_test"
        self.configURL    = "RANDOM;;URL;;NAME"
        self.configString = "This is a random string"

        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules =
                                ["WMComponent.DBS3Buffer",
                                 'WMCore.Agent.Database'],
                                useDefault = False)
        self.testInit.setupCouch(self.couchDB, "GroupUser", "ConfigCache")

        myThread = threading.currentThread()
        self.bufferFactory = DAOFactory(package = "WMComponent.DBSBuffer.Database",
                                        logger = myThread.logger,
                                        dbinterface = myThread.dbi)

        locationAction = self.bufferFactory(classname = "DBSBufferFiles.AddLocation")
        locationAction.execute(siteName = "se1.cern.ch")
        locationAction.execute(siteName = "se1.fnal.gov")
        locationAction.execute(siteName = "malpaquet")


        # Set heartbeat
        self.componentName = 'JobSubmitter'
        self.heartbeatAPI  = HeartbeatAPI(self.componentName)
        self.heartbeatAPI.registerComponent()

        # Set up a config cache
        configCache = ConfigCache(os.environ["COUCHURL"], couchDBName = self.couchDB)
        configCache.createUserGroup(groupname = "testGroup", username = '******')
        self.testDir = self.testInit.generateWorkDir()

        psetPath = os.path.join(self.testDir, "PSet.txt")
        f = open(psetPath, 'w')
        f.write(self.configString)
        f.close()

        configCache.addConfig(newConfig = psetPath, psetHash = None)
        configCache.save()
        self.configURL = "%s;;%s;;%s" % (os.environ["COUCHURL"],
                                         self.couchDB,
                                         configCache.getCouchID())

        return

    def tearDown(self):
        """
        _tearDown_

        tearDown function for unittest
        """

        self.testInit.clearDatabase(modules = ["WMComponent.DBS3Buffer",
                                               'WMCore.Agent.Database'])

    def createConfig(self):
        """
        _createConfig_

        This creates the actual config file used by the component

        """
        config = Configuration()

        #First the general stuff
        config.section_("General")
        config.General.workDir = os.getenv("TESTDIR", os.getcwd())

        config.section_("Agent")
        config.Agent.componentName = 'DBSUpload'
        config.Agent.useHeartbeat    = False

        #Now the CoreDatabase information
        #This should be the dialect, dburl, etc
        config.section_("CoreDatabase")
        config.CoreDatabase.connectUrl = os.getenv("DATABASE")
        config.CoreDatabase.socket     = os.getenv("DBSOCK")


        config.component_("DBSUpload")
        config.DBSUpload.pollInterval  = 10
        config.DBSUpload.logLevel      = 'ERROR'
        config.DBSUpload.maxThreads    = 1
        config.DBSUpload.namespace     = 'WMComponent.DBSUpload.DBSUpload'
        config.DBSUpload.componentDir  = os.path.join(os.getcwd(), 'Components')
        config.DBSUpload.workerThreads = 4

        config.section_("DBSInterface")
        config.DBSInterface.globalDBSUrl     = 'http://vocms09.cern.ch:8880/cms_dbs_int_local_xx_writer/servlet/DBSServlet'
        config.DBSInterface.globalDBSVersion = 'DBS_2_0_9'
        config.DBSInterface.DBSUrl           = 'http://vocms09.cern.ch:8880/cms_dbs_int_local_yy_writer/servlet/DBSServlet'
        config.DBSInterface.DBSVersion       = 'DBS_2_0_9'
        config.DBSInterface.DBSBlockMaxFiles = 10
        config.DBSInterface.DBSBlockMaxSize  = 9999999999
        config.DBSInterface.DBSBlockMaxTime  = 10000
        config.DBSInterface.MaxFilesToCommit = 10

        # addition for Alerts messaging framework, work (alerts) and control
        # channel addresses to which the component will be sending alerts
        # these are destination addresses where AlertProcessor:Receiver listens
        config.section_("Alert")
        config.Alert.address = "tcp://127.0.0.1:5557"
        config.Alert.controlAddr = "tcp://127.0.0.1:5559"
        # configure threshold of DBS upload queue size alert threshold
        # reference: trac ticket #1628
        config.DBSUpload.alertUploadQueueSize = 2000

        return config


    def getFiles(self, name, tier, nFiles = 12, site = "malpaquet"):
        """
        Create some quick dummy test files


        """

        files = []

        for f in range(0, nFiles):
            testFile = DBSBufferFile(lfn = '%s-%s-%i' % (name, site, f), size = 1024,
                                     events = 20, checksums = {'cksum': 1})
            testFile.setAlgorithm(appName = name, appVer = "CMSSW_3_1_1",
                                  appFam = "RECO", psetHash = "GIBBERISH",
                                  configContent = self.configURL)
            testFile.setDatasetPath("/%s/%s/%s" % (name, name, tier))
            testFile.addRun(Run( 1, *[f]))
            testFile.setGlobalTag("aGlobalTag")
            testFile.create()
            testFile.setLocation(site)
            files.append(testFile)


        testFileChild = DBSBufferFile(lfn = '%s-%s-child' %(name, site), size = 1024,
                                 events = 10, checksums = {'cksum': 1})
        testFileChild.setAlgorithm(appName = name, appVer = "CMSSW_3_1_1",
                              appFam = "RECO", psetHash = "GIBBERISH",
                              configContent = self.configURL)
        testFileChild.setDatasetPath("/%s/%s_2/RECO" %(name, name))
        testFileChild.addRun(Run( 1, *[45]))
        testFileChild.setGlobalTag("aGlobalTag")
        testFileChild.create()
        testFileChild.setLocation(site)

        testFileChild.addParents([x['lfn'] for x in files])


        return files


    @attr('integration')
    def testA_basicUploadTest(self):
        """
        _basicUploadTest_

        Do everything simply once
        Create dataset, algo, files, blocks,
        upload them,
        mark as done, finish them, migrate them
        Also check the timeout
        """
        myThread = threading.currentThread()
        config = self.createConfig()
        config.DBSInterface.DBSBlockMaxTime = 3
        config.DBSUpload.pollInterval  = 4

        name = "ThisIsATest_%s" % (makeUUID())
        tier = "RECO"
        nFiles = 12
        files = self.getFiles(name = name, tier = tier, nFiles = nFiles)
        datasetPath = '/%s/%s/%s' % (name, name, tier)


        # Load components that are necessary to check status
        factory     = WMFactory("dbsUpload", "WMComponent.DBSUpload.Database.Interface")
        dbinterface = factory.loadObject("UploadToDBS")

        dbsInterface = DBSInterface(config = config)
        localAPI     = dbsInterface.getAPIRef()
        globeAPI     = dbsInterface.getAPIRef(globalRef = True)

        # In the first round we should create blocks for the first dataset
        # The child dataset should not be handled until the parent is uploaded
        testDBSUpload = DBSUploadPoller(config = config)
        testDBSUpload.algorithm()

        # First, see if there are any blocks
        # One in DBS, one not in DBS
        result = myThread.dbi.processData("SELECT status FROM dbsbuffer_block")[0].fetchall()
        self.assertEqual(len(result), 2)
        self.assertEqual(result, [('InGlobalDBS',), ('Open',)])

        # Check to see if datasets and algos are in local DBS
        result  = listAlgorithms(apiRef = localAPI, patternExe = name)
        self.assertEqual(len(result), 1)
        self.assertEqual(result[0]['ExecutableName'], name)
        result  = listPrimaryDatasets(apiRef = localAPI, match = name)
        self.assertEqual(result, [name])
        result    = listProcessedDatasets(apiRef = localAPI, primary = name, dataTier = "*")

        # Then check and see that the closed block made it into local DBS
        affectedBlocks = listBlocks(apiRef = localAPI, datasetPath = datasetPath)
        if affectedBlocks[0]['OpenForWriting'] == '0':
            self.assertEqual(affectedBlocks[1]['OpenForWriting'], '1')
            self.assertEqual(affectedBlocks[0]['NumberOfFiles'], 10)
            self.assertEqual(affectedBlocks[1]['NumberOfFiles'], 2)
        else:
            self.assertEqual(affectedBlocks[0]['OpenForWriting'], '1')
            self.assertEqual(affectedBlocks[1]['NumberOfFiles'], 10)
            self.assertEqual(affectedBlocks[0]['NumberOfFiles'], 2)

        # Check to make sure all the files are in local
        result = listDatasetFiles(apiRef = localAPI, datasetPath = datasetPath)
        fileLFNs = [x['lfn'] for x in files]
        for lfn in fileLFNs:
            self.assertTrue(lfn in result)

        # Make sure the child files aren't there
        flag = False
        try:
            listDatasetFiles(apiRef = localAPI,
                             datasetPath = '/%s/%s_2/%s' % (name, name, tier))
        except Exception, ex:
            flag = True
        self.assertTrue(flag)


        # There should be one blocks in global
        # It should have ten files and be closed
        result    = listBlocks(apiRef = globeAPI, datasetPath = datasetPath)
        self.assertEqual(len(result), 1)
        for block in result:
            self.assertEqual(block['OpenForWriting'], '0')
            self.assertTrue(block['NumberOfFiles'] in [2, 10])

        # Okay, deep breath.  First round done
        # In the second round, the second block of the parent fileset should transfer
        # Make sure that the timeout functions work
        time.sleep(10)
        testDBSUpload.algorithm()

        result = myThread.dbi.processData("SELECT status FROM dbsbuffer_block")[0].fetchall()
        self.assertEqual(len(result), 2)
        self.assertEqual(result, [('InGlobalDBS',), ('InGlobalDBS',)])

        # Check to make sure all the files are in global
        result = listDatasetFiles(apiRef = globeAPI, datasetPath = datasetPath)
        for lfn in fileLFNs:
            self.assertTrue(lfn in result)

        # Make sure the child files aren't there
        flag = False
        try:
            listDatasetFiles(apiRef = localAPI,
                             datasetPath = '/%s/%s_2/%s' % (name, name, tier))
        except Exception, ex:
            flag = True
Beispiel #32
0
class JobSubmitterTest(unittest.TestCase):
    """
    Test class for the JobSubmitter

    """

    sites = ["T2_US_Florida", "T2_US_UCSD", "T2_TW_Taiwan", "T1_CH_CERN"]

    def setUp(self):
        """
        Standard setup: Now with 100% more couch
        """

        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection(destroyAllDatabase=True)
        self.testInit.setSchema(
            customModules=["WMCore.WMBS", "WMCore.BossAir", "WMCore.ResourceControl", "WMCore.Agent.Database"],
            useDefault=False,
        )
        self.testInit.setupCouch("jobsubmitter_t/jobs", "JobDump")
        self.testInit.setupCouch("jobsubmitter_t/fwjrs", "FWJRDump")

        myThread = threading.currentThread()
        self.daoFactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi)

        locationAction = self.daoFactory(classname="Locations.New")
        locationSlots = self.daoFactory(classname="Locations.SetJobSlots")

        # We actually need the user name
        self.user = getpass.getuser()

        self.ceName = "127.0.0.1"

        # Create sites in resourceControl
        resourceControl = ResourceControl()
        for site in self.sites:
            resourceControl.insertSite(
                siteName=site,
                seName="se.%s" % (site),
                ceName=site,
                plugin="CondorPlugin",
                pendingSlots=10000,
                runningSlots=20000,
                cmsName=site,
            )
            resourceControl.insertThreshold(siteName=site, taskType="Processing", maxSlots=10000)

        self.testDir = self.testInit.generateWorkDir()

        # Set heartbeat
        self.componentName = "JobSubmitter"
        self.heartbeatAPI = HeartbeatAPI(self.componentName)
        self.heartbeatAPI.registerComponent()

        return

    def tearDown(self):
        """
        Standard tearDown

        """
        self.testInit.clearDatabase(
            modules=["WMCore.WMBS", "WMCore.ResourceControl", "WMCore.BossAir", "WMCore.Agent.Database"]
        )
        self.testInit.delWorkDir()

        self.testInit.tearDownCouch()

        return

    def createJobGroups(self, nSubs, nJobs, task, workloadSpec, site=None, bl=[], wl=[], type="Processing"):
        """
        Creates a series of jobGroups for submissions

        """

        jobGroupList = []

        testWorkflow = Workflow(spec=workloadSpec, owner="mnorman", name=makeUUID(), task="basicWorkload/Production")
        testWorkflow.create()

        # Create subscriptions
        for i in range(nSubs):

            name = makeUUID()

            # Create Fileset, Subscription, jobGroup
            testFileset = Fileset(name=name)
            testFileset.create()
            testSubscription = Subscription(
                fileset=testFileset, workflow=testWorkflow, type=type, split_algo="FileBased"
            )
            testSubscription.create()

            testJobGroup = JobGroup(subscription=testSubscription)
            testJobGroup.create()

            # Create jobs
            self.makeNJobs(
                name=name,
                task=task,
                nJobs=nJobs,
                jobGroup=testJobGroup,
                fileset=testFileset,
                sub=testSubscription.exists(),
                site=site,
                bl=bl,
                wl=wl,
            )

            testFileset.commit()
            testJobGroup.commit()
            jobGroupList.append(testJobGroup)

        return jobGroupList

    def makeNJobs(self, name, task, nJobs, jobGroup, fileset, sub, site=None, bl=[], wl=[]):
        """
        _makeNJobs_

        Make and return a WMBS Job and File
        This handles all those damn add-ons

        """
        # Set the CacheDir
        cacheDir = os.path.join(self.testDir, "CacheDir")

        for n in range(nJobs):
            # First make a file
            # site = self.sites[0]
            testFile = File(lfn="/singleLfn/%s/%s" % (name, n), size=1024, events=10)
            if site:
                testFile.setLocation(site)
            else:
                for tmpSite in self.sites:
                    testFile.setLocation("se.%s" % (tmpSite))
            testFile.create()
            fileset.addFile(testFile)

        fileset.commit()

        index = 0
        for f in fileset.files:
            index += 1
            testJob = Job(name="%s-%i" % (name, index))
            testJob.addFile(f)
            testJob["location"] = f.getLocations()[0]
            testJob["task"] = task.getPathName()
            testJob["sandbox"] = task.data.input.sandbox
            testJob["spec"] = os.path.join(self.testDir, "basicWorkload.pcl")
            testJob["mask"]["FirstEvent"] = 101
            testJob["siteBlacklist"] = bl
            testJob["siteWhitelist"] = wl
            testJob["priority"] = 101
            jobCache = os.path.join(cacheDir, "Sub_%i" % (sub), "Job_%i" % (index))
            os.makedirs(jobCache)
            testJob.create(jobGroup)
            testJob["cache_dir"] = jobCache
            testJob.save()
            jobGroup.add(testJob)
            output = open(os.path.join(jobCache, "job.pkl"), "w")
            pickle.dump(testJob, output)
            output.close()

        return testJob, testFile

    def getConfig(
        self, configPath=os.path.join(WMCore.WMInit.getWMBASE(), "src/python/WMComponent/JobSubmitter/DefaultConfig.py")
    ):
        """
        _getConfig_

        Gets a basic config from default location
        """

        myThread = threading.currentThread()

        config = Configuration()

        config.component_("Agent")
        config.Agent.WMSpecDirectory = self.testDir
        config.Agent.agentName = "testAgent"
        config.Agent.componentName = self.componentName
        config.Agent.useHeartbeat = False

        # First the general stuff
        config.section_("General")
        config.General.workDir = os.getenv("TESTDIR", self.testDir)

        # Now the CoreDatabase information
        # This should be the dialect, dburl, etc

        config.section_("CoreDatabase")
        config.CoreDatabase.connectUrl = os.getenv("DATABASE")
        config.CoreDatabase.socket = os.getenv("DBSOCK")

        config.section_("BossAir")
        config.BossAir.pluginNames = ["TestPlugin", "CondorPlugin"]
        config.BossAir.pluginDir = "WMCore.BossAir.Plugins"

        config.component_("JobSubmitter")
        config.JobSubmitter.logLevel = "INFO"
        config.JobSubmitter.maxThreads = 1
        config.JobSubmitter.pollInterval = 10
        config.JobSubmitter.pluginName = "CondorGlobusPlugin"
        config.JobSubmitter.pluginDir = "JobSubmitter.Plugins"
        config.JobSubmitter.submitNode = os.getenv("HOSTNAME", "badtest.fnal.gov")
        config.JobSubmitter.submitScript = os.path.join(
            WMCore.WMBase.getTestBase(), "WMComponent_t/JobSubmitter_t", "submit.sh"
        )
        config.JobSubmitter.componentDir = os.path.join(self.testDir, "Components")
        config.JobSubmitter.workerThreads = 2
        config.JobSubmitter.jobsPerWorker = 200
        config.JobSubmitter.inputFile = os.path.join(
            WMCore.WMBase.getTestBase(), "WMComponent_t/JobSubmitter_t", "FrameworkJobReport-4540.xml"
        )
        config.JobSubmitter.deleteJDLFiles = False

        # JobStateMachine
        config.component_("JobStateMachine")
        config.JobStateMachine.couchurl = os.getenv("COUCHURL")
        config.JobStateMachine.couchDBName = "jobsubmitter_t"

        # Needed, because this is a test
        os.makedirs(config.JobSubmitter.componentDir)

        return config

    def createTestWorkload(self, workloadName="Test", emulator=True):
        """
        _createTestWorkload_

        Creates a test workload for us to run on, hold the basic necessities.
        """

        workload = testWorkload("Tier1ReReco")
        rereco = workload.getTask("ReReco")

        taskMaker = TaskMaker(workload, os.path.join(self.testDir, "workloadTest"))
        taskMaker.skipSubscription = True
        taskMaker.processWorkload()

        return workload

    def checkJDL(self, config, cacheDir, submitFile, site=None, indexFlag=False, noIndex=False):
        """
        _checkJDL_

        Check the basic JDL setup
        """

        jobs, head = parseJDL(jdlLocation=os.path.join(config.JobSubmitter.submitDir, submitFile))

        batch = 1

        # Check each job entry in the JDL
        for job in jobs:
            # Check each key
            index = int(job.get("+WMAgent_JobID", 0))
            self.assertTrue(index != 0)

            argValue = index - 1
            if indexFlag:
                batch = index - 1

            inputFileString = "%s, %s, %s" % (
                os.path.join(self.testDir, "workloadTest/TestWorkload", "TestWorkload-Sandbox.tar.bz2"),
                os.path.join(
                    self.testDir, "workloadTest/TestWorkload", "PackageCollection_0/batch_%i-0/JobPackage.pkl" % (batch)
                ),
                os.path.join(WMCore.WMInit.getWMBASE(), "src/python/WMCore", "WMRuntime/Unpacker.py"),
            )
            if not noIndex:
                self.assertEqual(job.get("transfer_input_files", None), inputFileString)
            # Arguments use a list starting from 0
            self.assertEqual(job.get("arguments", None), "TestWorkload-Sandbox.tar.bz2 %i" % (index))

            if site:
                self.assertEqual(job.get("+DESIRED_Sites", None), '"%s"' % site)

            # Check the priority
            self.assertEqual(job.get("priority", None), "101")

        # Now handle the head
        self.assertEqual(head.get("should_transfer_files", None), "YES")
        self.assertEqual(head.get("Log", None), "condor.$(Cluster).$(Process).log")
        self.assertEqual(head.get("Error", None), "condor.$(Cluster).$(Process).err")
        self.assertEqual(head.get("Output", None), "condor.$(Cluster).$(Process).out")
        self.assertEqual(head.get("when_to_transfer_output", None), "ON_EXIT")
        self.assertEqual(head.get("Executable", None), config.JobSubmitter.submitScript)

        return

    @attr("integration")
    def testA_BasicTest(self):
        """
        Use the CondorGlobusPlugin to create a very simple test
        Check to see that all the jobs were submitted
        Parse and test the JDL files
        See what condor says
        """
        workloadName = "basicWorkload"

        myThread = threading.currentThread()

        workload = self.createTestWorkload()

        config = self.getConfig()

        changeState = ChangeState(config)

        nSubs = 1
        nJobs = 10
        cacheDir = os.path.join(self.testDir, "CacheDir")

        jobGroupList = self.createJobGroups(
            nSubs=nSubs,
            nJobs=nJobs,
            task=workload.getTask("ReReco"),
            workloadSpec=os.path.join(self.testDir, "workloadTest", workloadName),
            site="se.T2_US_UCSD",
        )
        for group in jobGroupList:
            changeState.propagate(group.jobs, "created", "new")

        # Do pre-submit check
        getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs")
        result = getJobsAction.execute(state="Created", jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, 0, "User currently has %i running jobs.  Test will not continue" % (nRunning))

        jobSubmitter = JobSubmitterPoller(config=config)
        jobSubmitter.algorithm()

        # Check that jobs are in the right state
        result = getJobsAction.execute(state="Created", jobType="Processing")
        self.assertEqual(len(result), 0)
        result = getJobsAction.execute(state="Executing", jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        # Check assigned locations
        getLocationAction = self.daoFactory(classname="Jobs.GetLocation")
        for id in result:
            loc = getLocationAction.execute(jobid=id)
            self.assertEqual(loc, [["T2_US_UCSD"]])

        # Check on the JDL
        submitFile = None
        for file in os.listdir(config.JobSubmitter.submitDir):
            if re.search("submit", file):
                submitFile = file
        self.assertTrue(submitFile != None)
        self.checkJDL(config=config, cacheDir=cacheDir, submitFile=submitFile, site="T2_US_UCSD")

        # if os.path.exists('CacheDir'):
        #    shutil.rmtree('CacheDir')
        # shutil.copytree(self.testDir, 'CacheDir')

        # Check to make sure we have running jobs
        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, nJobs * nSubs)

        # This should do nothing
        jobGroupList = self.createJobGroups(
            nSubs=nSubs,
            nJobs=nJobs,
            task=workload.getTask("ReReco"),
            workloadSpec=os.path.join(self.testDir, "workloadTest", workloadName),
            site="se.T2_US_UCSD",
        )
        for group in jobGroupList:
            changeState.propagate(group.jobs, "created", "new")
        jobSubmitter.algorithm()

        # Now clean-up
        command = ["condor_rm", self.user]
        pipe = Popen(command, stdout=PIPE, stderr=PIPE, shell=False)
        pipe.communicate()

        del jobSubmitter

        return

    @attr("performance")
    def testB_TimeLongSubmission(self):
        """
        _TimeLongSubmission_

        Submit a lot of jobs and test how long it takes for
        them to actually be submitted
        """

        return

        workloadName = "basicWorkload"
        myThread = threading.currentThread()
        workload = self.createTestWorkload()
        config = self.getConfig()
        changeState = ChangeState(config)

        nSubs = 5
        nJobs = 300
        cacheDir = os.path.join(self.testDir, "CacheDir")

        jobGroupList = self.createJobGroups(
            nSubs=nSubs,
            nJobs=nJobs,
            task=workload.getTask("ReReco"),
            workloadSpec=os.path.join(self.testDir, "workloadTest", workloadName),
        )

        for group in jobGroupList:
            changeState.propagate(group.jobs, "created", "new")

        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, 0, "User currently has %i running jobs.  Test will not continue" % (nRunning))

        jobSubmitter = JobSubmitterPoller(config=config)

        # Actually run it
        startTime = time.time()
        cProfile.runctx("jobSubmitter.algorithm()", globals(), locals(), filename="testStats.stat")
        # jobSubmitter.algorithm()
        stopTime = time.time()

        if os.path.isdir("CacheDir"):
            shutil.rmtree("CacheDir")
        shutil.copytree("%s" % self.testDir, os.path.join(os.getcwd(), "CacheDir"))

        # Check to make sure we have running jobs
        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, nJobs * nSubs)

        # Now clean-up
        command = ["condor_rm", self.user]
        pipe = Popen(command, stdout=PIPE, stderr=PIPE, shell=False)
        pipe.communicate()

        print "Job took %f seconds to complete" % (stopTime - startTime)

        p = pstats.Stats("testStats.stat")
        p.sort_stats("cumulative")
        p.print_stats()

        return

    def testD_CreamCETest(self):
        """
        _CreamCETest_

        This is for submitting to Cream CEs.  Don't use it.
        """

        return

        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, 0, "User currently has %i running jobs.  Test will not continue" % (nRunning))

        workloadName = "basicWorkload"

        myThread = threading.currentThread()

        workload = self.createTestWorkload()

        config = self.getConfig()
        config.JobSubmitter.pluginName = "CreamPlugin"

        changeState = ChangeState(config)

        nSubs = 1
        nJobs = 10
        cacheDir = os.path.join(self.testDir, "CacheDir")

        # Add a new site
        siteName = "creamSite"
        ceName = "https://cream-1-fzk.gridka.de:8443/ce-cream/services/CREAM2  pbs cmsXS"
        # ceName = "127.0.0.1"
        locationAction = self.daoFactory(classname="Locations.New")
        pendingSlots = self.daoFactory(classname="Locations.SetPendingSlots")
        locationAction.execute(siteName=siteName, seName=siteName, ceName=ceName)
        pendingSlots.execute(siteName=siteName, pendingSlots=1000)

        resourceControl = ResourceControl()
        resourceControl.insertSite(siteName=siteName, seName=siteName, ceName=ceName)
        resourceControl.insertThreshold(siteName=siteName, taskType="Processing", maxSlots=10000)

        jobGroupList = self.createJobGroups(
            nSubs=nSubs,
            nJobs=nJobs,
            task=workload.getTask("ReReco"),
            workloadSpec=os.path.join(self.testDir, "workloadTest", workloadName),
            site=siteName,
        )
        for group in jobGroupList:
            changeState.propagate(group.jobs, "created", "new")

        jobSubmitter = JobSubmitterPoller(config=config)
        jobSubmitter.algorithm()

        # Check that jobs are in the right state
        getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs")
        result = getJobsAction.execute(state="Created", jobType="Processing")
        self.assertEqual(len(result), 0)
        result = getJobsAction.execute(state="Executing", jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        # Now clean-up
        command = ["condor_rm", self.user]
        pipe = Popen(command, stdout=PIPE, stderr=PIPE, shell=False)
        pipe.communicate()

        if os.path.exists("CacheDir"):
            shutil.rmtree("CacheDir")
        shutil.copytree(self.testDir, "CacheDir")

        return

    @attr("integration")
    def testE_WhiteListBlackList(self):
        """
        _WhiteListBlackList_

        Test the whitelist/blacklist implementation
        Trust the jobCreator to get this in the job right
        """
        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, 0, "User currently has %i running jobs.  Test will not continue" % (nRunning))

        workloadName = "basicWorkload"
        myThread = threading.currentThread()
        workload = self.createTestWorkload()
        config = self.getConfig()
        changeState = ChangeState(config)

        nSubs = 2
        nJobs = 10
        cacheDir = os.path.join(self.testDir, "CacheDir")

        jobGroupList = self.createJobGroups(
            nSubs=nSubs,
            nJobs=nJobs,
            task=workload.getTask("ReReco"),
            workloadSpec=os.path.join(self.testDir, "workloadTest", workloadName),
            bl=["T2_US_Florida", "T2_TW_Taiwan", "T1_CH_CERN"],
        )

        for group in jobGroupList:
            changeState.propagate(group.jobs, "created", "new")

        jobSubmitter = JobSubmitterPoller(config=config)

        # Actually run it
        jobSubmitter.algorithm()

        if os.path.isdir("CacheDir"):
            shutil.rmtree("CacheDir")
        shutil.copytree("%s" % self.testDir, os.path.join(os.getcwd(), "CacheDir"))

        # Check to make sure we have running jobs
        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, nJobs * nSubs)

        getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs")
        result = getJobsAction.execute(state="Executing", jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        # All jobs should be at UCSD
        submitFile = None
        for file in os.listdir(config.JobSubmitter.submitDir):
            if re.search("submit", file):
                submitFile = file
        self.assertTrue(submitFile != None)
        # submitFile = os.listdir(config.JobSubmitter.submitDir)[0]
        self.checkJDL(config=config, cacheDir=cacheDir, submitFile=submitFile, site="T2_US_UCSD")

        # Now clean-up
        command = ["condor_rm", self.user]
        pipe = Popen(command, stdout=PIPE, stderr=PIPE, shell=False)
        pipe.communicate()

        # Run again and test the whiteList
        jobGroupList = self.createJobGroups(
            nSubs=nSubs,
            nJobs=nJobs,
            task=workload.getTask("ReReco"),
            workloadSpec=os.path.join(self.testDir, "workloadTest", workloadName),
            wl=["T2_US_UCSD"],
        )

        for group in jobGroupList:
            changeState.propagate(group.jobs, "created", "new")

        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, 0, "User currently has %i running jobs.  Test will not continue" % (nRunning))

        jobSubmitter = JobSubmitterPoller(config=config)

        # Actually run it
        jobSubmitter.algorithm()

        if os.path.isdir("CacheDir"):
            shutil.rmtree("CacheDir")
        shutil.copytree("%s" % self.testDir, os.path.join(os.getcwd(), "CacheDir"))

        # Check to make sure we have running jobs
        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, nJobs * nSubs)

        # You'll have jobs from the previous run still in the database
        result = getJobsAction.execute(state="Executing", jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs * 2)

        # All jobs should be at UCSD
        submitFile = None
        for file in os.listdir(config.JobSubmitter.submitDir):
            if re.search("submit", file):
                submitFile = file
        self.assertTrue(submitFile != None)
        self.checkJDL(config=config, cacheDir=cacheDir, submitFile=submitFile, site="T2_US_UCSD", noIndex=True)

        # Now clean-up
        command = ["condor_rm", self.user]
        pipe = Popen(command, stdout=PIPE, stderr=PIPE, shell=False)
        pipe.communicate()

        # Run again with an invalid whitelist
        # NOTE: After this point, the original two sets of jobs will be executing
        # The rest of the jobs should move to submitFailed
        jobGroupList = self.createJobGroups(
            nSubs=nSubs,
            nJobs=nJobs,
            task=workload.getTask("ReReco"),
            workloadSpec=os.path.join(self.testDir, "workloadTest", workloadName),
            wl=["T2_US_Namibia"],
        )

        for group in jobGroupList:
            changeState.propagate(group.jobs, "created", "new")

        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, 0, "User currently has %i running jobs.  Test will not continue" % (nRunning))

        jobSubmitter = JobSubmitterPoller(config=config)

        # Actually run it
        jobSubmitter.algorithm()

        # Check to make sure we have running jobs
        # nRunning = getCondorRunningJobs(self.user)
        # self.assertEqual(nRunning, 0)

        # Jobs should be gone
        getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs")
        result = getJobsAction.execute(state="Executing", jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs * 2)
        result = getJobsAction.execute(state="SubmitFailed", jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        # Now clean-up
        command = ["condor_rm", self.user]
        pipe = Popen(command, stdout=PIPE, stderr=PIPE, shell=False)
        pipe.communicate()

        # Run again with all sites blacklisted
        jobGroupList = self.createJobGroups(
            nSubs=nSubs,
            nJobs=nJobs,
            task=workload.getTask("ReReco"),
            workloadSpec=os.path.join(self.testDir, "workloadTest", workloadName),
            bl=self.sites,
        )

        for group in jobGroupList:
            changeState.propagate(group.jobs, "created", "new")

        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, 0, "User currently has %i running jobs.  Test will not continue" % (nRunning))

        jobSubmitter = JobSubmitterPoller(config=config)

        # Actually run it
        jobSubmitter.algorithm()

        # Check to make sure we have running jobs
        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, 0)

        # Jobs should be gone
        getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs")
        result = getJobsAction.execute(state="Executing", jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs * 2)
        result = getJobsAction.execute(state="SubmitFailed", jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs * 2)

        # Now clean-up
        command = ["condor_rm", self.user]
        pipe = Popen(command, stdout=PIPE, stderr=PIPE, shell=False)
        pipe.communicate()

        del jobSubmitter
        return

    @attr("integration")
    def testF_OverloadTest(self):
        """
        _OverloadTest_
        
        Test and see what happens if you put in more jobs
        Then the sites can handle
        """

        resourceControl = ResourceControl()
        for site in self.sites:
            resourceControl.insertThreshold(siteName=site, taskType="Silly", maxSlots=1)

        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, 0, "User currently has %i running jobs.  Test will not continue" % (nRunning))

        workloadName = "basicWorkload"
        myThread = threading.currentThread()
        workload = self.createTestWorkload()
        config = self.getConfig()
        changeState = ChangeState(config)

        nSubs = 2
        nJobs = 10
        cacheDir = os.path.join(self.testDir, "CacheDir")

        jobGroupList = self.createJobGroups(
            nSubs=nSubs,
            nJobs=nJobs,
            task=workload.getTask("ReReco"),
            workloadSpec=os.path.join(self.testDir, "workloadTest", workloadName),
            type="Silly",
        )

        for group in jobGroupList:
            changeState.propagate(group.jobs, "created", "new")

        jobSubmitter = JobSubmitterPoller(config=config)

        # Actually run it
        jobSubmitter.algorithm()

        # Should be one job for each site
        nSites = len(self.sites)
        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, nSites)

        getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs")
        result = getJobsAction.execute(state="Executing", jobType="Silly")
        self.assertEqual(len(result), nSites)
        result = getJobsAction.execute(state="Created", jobType="Silly")
        self.assertEqual(len(result), nJobs * nSubs - nSites)

        # Now clean-up
        command = ["condor_rm", self.user]
        pipe = Popen(command, stdout=PIPE, stderr=PIPE, shell=False)
        pipe.communicate()

        del jobSubmitter

        return

    @attr("integration")
    def testG_IndexErrorTest(self):
        """
        _IndexErrorTest_

        Check to see you get proper indexes for the jobPackages
        if you have more jobs then you normally run at once.
        """
        workloadName = "basicWorkload"

        myThread = threading.currentThread()

        workload = self.createTestWorkload()

        config = self.getConfig()
        config.JobSubmitter.jobsPerWorker = 1
        config.JobSubmitter.collectionSize = 1

        changeState = ChangeState(config)

        nSubs = 1
        nJobs = 10
        cacheDir = os.path.join(self.testDir, "CacheDir")

        jobGroupList = self.createJobGroups(
            nSubs=nSubs,
            nJobs=nJobs,
            task=workload.getTask("ReReco"),
            workloadSpec=os.path.join(self.testDir, "workloadTest", workloadName),
            site="se.T2_US_UCSD",
        )
        for group in jobGroupList:
            changeState.propagate(group.jobs, "created", "new")

        # Do pre-submit check
        getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs")
        result = getJobsAction.execute(state="Created", jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, 0, "User currently has %i running jobs.  Test will not continue" % (nRunning))

        jobSubmitter = JobSubmitterPoller(config=config)
        jobSubmitter.algorithm()

        if os.path.exists("CacheDir"):
            shutil.rmtree("CacheDir")
        shutil.copytree(self.testDir, "CacheDir")

        # Check that jobs are in the right state
        result = getJobsAction.execute(state="Created", jobType="Processing")
        self.assertEqual(len(result), 0)
        result = getJobsAction.execute(state="Executing", jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        # Check on the JDL
        submitFile = None
        for file in os.listdir(config.JobSubmitter.submitDir):
            if re.search("submit", file):
                submitFile = file
        self.assertTrue(submitFile != None)
        self.checkJDL(config=config, cacheDir=cacheDir, submitFile=submitFile, site="T2_US_UCSD", indexFlag=True)

        # Check to make sure we have running jobs
        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, nJobs * nSubs)

        # Now clean-up
        command = ["condor_rm", self.user]
        pipe = Popen(command, stdout=PIPE, stderr=PIPE, shell=False)
        pipe.communicate()

        del jobSubmitter
        return
Beispiel #33
0
    def testUpdateWorkers(self):
        """
        _testUpdateWorkers_

        Create a couple of components and workers and test the update methods
        """
        comp1 = HeartbeatAPI("testComponent1",
                             pollInterval=60,
                             heartbeatTimeout=600)
        comp1.registerComponent()
        comp1.registerWorker("testWorker1")
        comp1.registerWorker("testWorker2")

        comp2 = HeartbeatAPI("testComponent2",
                             pollInterval=30,
                             heartbeatTimeout=300)
        comp2.registerComponent()
        comp2.registerWorker("testWorker21")

        comp1.updateWorkerCycle("testWorker1", 1.001, None)
        comp2.updateWorkerCycle("testWorker21", 1234.1, 100)
        hb1 = comp1.getHeartbeatInfo()
        hb2 = comp2.getHeartbeatInfo()

        for worker in hb1:
            if worker['worker_name'] == 'testWorker1':
                self.assertTrue(worker["cycle_time"] > 1.0)
            else:
                self.assertEqual(worker["cycle_time"], 0)
        self.assertItemsEqual([item["outcome"] for item in hb1], [None, None])
        self.assertItemsEqual([item["error_message"] for item in hb1],
                              [None, None])

        self.assertEqual(round(hb2[0]["cycle_time"], 1), 1234.1)
        self.assertEqual(hb2[0]["outcome"], '100')
        self.assertEqual(hb2[0]["error_message"], None)

        # time to update workers with an error
        comp1.updateWorkerError("testWorker2", "BAD JOB!!!")
        hb1 = comp1.getHeartbeatInfo()
        for worker in hb1:
            if worker['worker_name'] == 'testWorker2':
                self.assertTrue(worker["last_error"] > int(time.time() - 10))
                self.assertEqual(worker["state"], "Error")
                self.assertEqual(worker["error_message"], "BAD JOB!!!")
Beispiel #34
0
class JobSubmitterTest(EmulatedUnitTestCase):
    """
    _JobSubmitterTest_

    Test class for the JobSubmitterPoller
    """

    def setUp(self):
        """
        _setUp_

        Standard setup: Now with 100% more couch
        """
        super(JobSubmitterTest, self).setUp()

        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(
            customModules=["WMCore.WMBS", "WMCore.BossAir", "WMCore.ResourceControl", "WMCore.Agent.Database"])
        self.testInit.setupCouch("jobsubmitter_t/jobs", "JobDump")
        self.testInit.setupCouch("jobsubmitter_t/fwjrs", "FWJRDump")
        self.testInit.setupCouch("wmagent_summary_t", "WMStats")

        myThread = threading.currentThread()
        self.daoFactory = DAOFactory(package="WMCore.WMBS",
                                     logger=myThread.logger,
                                     dbinterface=myThread.dbi)
        self.baDaoFactory = DAOFactory(package="WMCore.BossAir",
                                       logger=myThread.logger,
                                       dbinterface=myThread.dbi)

        self.testDir = self.testInit.generateWorkDir()

        # Set heartbeat
        self.componentName = 'JobSubmitter'
        self.heartbeatAPI = HeartbeatAPI(self.componentName)
        self.heartbeatAPI.registerComponent()
        self.configFile = EmulatorSetup.setupWMAgentConfig()
        config = self.getConfig()
        myThread.logdbClient = MockLogDB(config.General.central_logdb_url,
                                         config.Agent.hostName, logger=None)
        return

    def tearDown(self):
        """
        _tearDown_

        Standard tearDown
        """
        myThread = threading.currentThread()
        self.testInit.clearDatabase()
        self.testInit.delWorkDir()
        self.testInit.tearDownCouch()
        EmulatorSetup.deleteConfig(self.configFile)
        myThread.logdbClient = None
        return

    def setResourceThresholds(self, site, **options):
        """
        _setResourceThresholds_

        Utility to set resource thresholds
        """
        if not options:
            options = {'state': 'Normal',
                       'runningSlots': 10,
                       'pendingSlots': 5,
                       'tasks': ['Processing', 'Merge'],
                       'Processing': {'pendingSlots': 5,
                                      'runningSlots': 10},
                       'Merge': {'pendingSlots': 2,
                                 'runningSlots': 5}}

        resourceControl = ResourceControl()
        resourceControl.insertSite(siteName=site, pnn='se.%s' % (site),
                                   ceName=site, plugin="MockPlugin", pendingSlots=options['pendingSlots'],
                                   runningSlots=options['runningSlots'], cmsName=site)
        for task in options['tasks']:
            resourceControl.insertThreshold(siteName=site, taskType=task,
                                            maxSlots=options[task]['runningSlots'],
                                            pendingSlots=options[task]['pendingSlots'])
        if options.get('state'):
            resourceControl.changeSiteState(site, options.get('state'))

        return

    def createJobGroups(self, nSubs, nJobs, task, workloadSpec, site,
                        taskType='Processing', name=None, wfPrio=1, changeState=None):
        """
        _createJobGroups_

        Creates a series of jobGroups for submissions
        changeState is an instance of the ChangeState class to make job status changes
        """

        jobGroupList = []

        if name is None:
            name = makeUUID()

        testWorkflow = Workflow(spec=workloadSpec, owner="tapas",
                                name=name, task="basicWorkload/Production",
                                priority=wfPrio)
        testWorkflow.create()

        # Create subscriptions
        for _ in range(nSubs):
            name = makeUUID()

            # Create Fileset, Subscription, jobGroup
            testFileset = Fileset(name=name)
            testFileset.create()
            testSubscription = Subscription(fileset=testFileset,
                                            workflow=testWorkflow,
                                            type=taskType,
                                            split_algo="FileBased")
            testSubscription.create()

            testJobGroup = JobGroup(subscription=testSubscription)
            testJobGroup.create()

            # Create jobs
            self.makeNJobs(name=name, task=task,
                           nJobs=nJobs,
                           jobGroup=testJobGroup,
                           fileset=testFileset,
                           sub=testSubscription.exists(),
                           site=site)

            testFileset.commit()
            testJobGroup.commit()
            jobGroupList.append(testJobGroup)

        if changeState:
            for group in jobGroupList:
                changeState.propagate(group.jobs, 'created', 'new')

        return jobGroupList

    def makeNJobs(self, name, task, nJobs, jobGroup, fileset, sub, site):
        """
        _makeNJobs_

        Make and return a WMBS Job and File
        This handles all those damn add-ons

        """
        # Set the CacheDir
        cacheDir = os.path.join(self.testDir, 'CacheDir')

        for n in range(nJobs):
            # First make a file
            # site = self.sites[0]
            testFile = File(lfn="/singleLfn/%s/%s" % (name, n),
                            size=1024, events=10)
            fileset.addFile(testFile)

        fileset.commit()

        location = None
        if isinstance(site, list):
            if len(site) > 0:
                location = site[0]
        else:
            location = site

        index = 0
        for f in fileset.files:
            index += 1
            testJob = Job(name='%s-%i' % (name, index))
            testJob.addFile(f)
            testJob["location"] = location
            testJob["possiblePSN"] = set(site) if isinstance(site, list) else set([site])
            testJob['task'] = task.getPathName()
            testJob['sandbox'] = task.data.input.sandbox
            testJob['spec'] = os.path.join(self.testDir, 'basicWorkload.pcl')
            testJob['mask']['FirstEvent'] = 101
            testJob['priority'] = 101
            testJob['numberOfCores'] = 1
            jobCache = os.path.join(cacheDir, 'Sub_%i' % (sub), 'Job_%i' % (index))
            os.makedirs(jobCache)
            testJob.create(jobGroup)
            testJob['cache_dir'] = jobCache
            testJob.save()
            jobGroup.add(testJob)
            output = open(os.path.join(jobCache, 'job.pkl'), 'w')
            pickle.dump(testJob, output)
            output.close()

        return testJob, testFile

    def getConfig(self):
        """
        _getConfig_

        Gets a basic config from default location
        """

        config = self.testInit.getConfiguration()
        self.testInit.generateWorkDir(config)

        config.component_("Agent")
        config.Agent.WMSpecDirectory = self.testDir
        config.Agent.agentName = 'testAgent'
        config.Agent.hostName = 'testAgent'
        config.Agent.componentName = self.componentName
        config.Agent.useHeartbeat = False

        # First the general stuff
        config.section_("General")
        config.General.workDir = os.getenv("TESTDIR", self.testDir)
        config.General.central_logdb_url = "http://localhost/testlogdb"
        config.General.ReqMgr2ServiceURL = "http://localhost/reqmgr2"

        # Now the CoreDatabase information
        config.section_("CoreDatabase")
        config.CoreDatabase.connectUrl = os.getenv("DATABASE")
        config.CoreDatabase.socket = os.getenv("DBSOCK")

        # BossAir and MockPlugin configuration
        config.section_("BossAir")
        config.BossAir.pluginNames = ['MockPlugin']
        # Here Test the CondorPlugin instead of MockPlugin
        # config.BossAir.pluginNames = ['CondorPlugin']
        config.BossAir.pluginDir = 'WMCore.BossAir.Plugins'
        config.BossAir.nCondorProcesses = 1
        config.BossAir.section_("MockPlugin")
        config.BossAir.MockPlugin.fakeReport = os.path.join(getTestBase(),
                                                            'WMComponent_t/JobSubmitter_t',
                                                            "submit.sh")

        # JobSubmitter configuration
        config.component_("JobSubmitter")
        config.JobSubmitter.logLevel = 'DEBUG'
        config.JobSubmitter.maxThreads = 1
        config.JobSubmitter.pollInterval = 10
        config.JobSubmitter.submitScript = os.path.join(getTestBase(),
                                                        'WMComponent_t/JobSubmitter_t',
                                                        'submit.sh')
        config.JobSubmitter.componentDir = os.path.join(self.testDir, 'Components')
        config.JobSubmitter.workerThreads = 2
        config.JobSubmitter.jobsPerWorker = 200
        config.JobSubmitter.drainGraceTime = 2  # in seconds

        # JobStateMachine
        config.component_('JobStateMachine')
        config.JobStateMachine.couchurl = os.getenv('COUCHURL')
        config.JobStateMachine.couchDBName = "jobsubmitter_t"
        config.JobStateMachine.jobSummaryDBName = 'wmagent_summary_t'

        # Needed, because this is a test
        try:
            os.makedirs(config.JobSubmitter.componentDir)
        except:
            pass

        return config

    def createTestWorkload(self, name='workloadTest'):
        """
        _createTestWorkload_

        Creates a test workload for us to run on, hold the basic necessities.
        """

        workload = testWorkload()

        taskMaker = TaskMaker(workload, os.path.join(self.testDir, name))
        taskMaker.skipSubscription = True
        taskMaker.processWorkload()
        self.workloadSpecPath = os.path.join(self.testDir, name,
                                             "%s/WMSandbox/WMWorkload.pkl" % name)

        return workload

    def testA_BasicTest(self):
        """
        Use the MockPlugin to create a simple test
        Check to see that all the jobs were "submitted",
        don't care about thresholds
        """
        workload = self.createTestWorkload()
        config = self.getConfig()
        changeState = ChangeState(config)

        nSubs = 2
        nJobs = 20
        site = "T2_US_UCSD"

        self.setResourceThresholds(site, pendingSlots=50, runningSlots=100, tasks=['Processing', 'Merge'],
                                   Processing={'pendingSlots': 50, 'runningSlots': 100},
                                   Merge={'pendingSlots': 50, 'runningSlots': 100})

        jobGroupList = self.createJobGroups(nSubs=nSubs, nJobs=nJobs,
                                            task=workload.getTask("ReReco"),
                                            workloadSpec=self.workloadSpecPath,
                                            site=site)
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        # Do pre-submit check
        getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs")
        result = getJobsAction.execute(state='Created', jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        jobSubmitter = JobSubmitterPoller(config=config)
        jobSubmitter.algorithm()

        # Check that jobs are in the right state
        result = getJobsAction.execute(state='Created', jobType="Processing")
        self.assertEqual(len(result), 0)
        result = getJobsAction.execute(state='Executing', jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        # Check assigned locations
        getLocationAction = self.daoFactory(classname="Jobs.GetLocation")
        for jobId in result:
            loc = getLocationAction.execute(jobid=jobId)
            self.assertEqual(loc, [['T2_US_UCSD']])

        # Run another cycle, it shouldn't submit anything. There isn't anything to submit
        jobSubmitter.algorithm()
        result = getJobsAction.execute(state='Created', jobType="Processing")
        self.assertEqual(len(result), 0)
        result = getJobsAction.execute(state='Executing', jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        nSubs = 1
        nJobs = 10

        # Submit another 10 jobs
        jobGroupList = self.createJobGroups(nSubs=nSubs, nJobs=nJobs,
                                            task=workload.getTask("ReReco"),
                                            workloadSpec=self.workloadSpecPath,
                                            site=site,
                                            taskType="Merge")
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        # Check that the jobs are available for submission and run another cycle
        result = getJobsAction.execute(state='Created', jobType="Merge")
        self.assertEqual(len(result), nSubs * nJobs)
        jobSubmitter.algorithm()

        # Check that the last 10 jobs were submitted as well.
        result = getJobsAction.execute(state='Created', jobType="Merge")
        self.assertEqual(len(result), 0)
        result = getJobsAction.execute(state='Executing', jobType="Merge")
        self.assertEqual(len(result), nSubs * nJobs)

        return

    def testB_thresholdTest(self):
        """
        _testB_thresholdTest_

        Check that the threshold management is working,
        this requires checks on pending/running jobs globally
        at a site and per task/site
        """
        workload = self.createTestWorkload()
        config = self.getConfig()
        changeState = ChangeState(config)

        nSubs = 5
        nJobs = 10
        site = "T1_US_FNAL"

        self.setResourceThresholds(site, pendingSlots=50, runningSlots=220, tasks=['Processing', 'Merge'],
                                   Processing={'pendingSlots': 45, 'runningSlots': 200},
                                   Merge={'pendingSlots': 10, 'runningSlots': 20, 'priority': 5})

        # Always initialize the submitter after setting the sites, flaky!
        jobSubmitter = JobSubmitterPoller(config=config)

        jobGroupList = self.createJobGroups(nSubs=nSubs, nJobs=nJobs,
                                            task=workload.getTask("ReReco"),
                                            workloadSpec=self.workloadSpecPath,
                                            site=site)
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        # Do pre-submit check
        getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs")
        result = getJobsAction.execute(state='Created', jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        jobSubmitter.algorithm()

        # Check that jobs are in the right state,
        # here we are limited by the pending threshold for the Processing task (45)
        result = getJobsAction.execute(state='Created', jobType="Processing")
        self.assertEqual(len(result), 5)
        result = getJobsAction.execute(state='Executing', jobType="Processing")
        self.assertEqual(len(result), 45)

        # Check assigned locations
        getLocationAction = self.daoFactory(classname="Jobs.GetLocation")
        for jobId in result:
            loc = getLocationAction.execute(jobid=jobId)
            self.assertEqual(loc, [['T1_US_FNAL']])

        # Run another cycle, it shouldn't submit anything. Jobs are still in pending
        jobSubmitter.algorithm()
        result = getJobsAction.execute(state='Created', jobType="Processing")
        self.assertEqual(len(result), 5)
        result = getJobsAction.execute(state='Executing', jobType="Processing")
        self.assertEqual(len(result), 45)

        # Now put 10 Merge jobs, only 5 can be submitted, there we hit the global pending threshold for the site
        nSubs = 1
        nJobs = 10
        jobGroupList = self.createJobGroups(nSubs=nSubs, nJobs=nJobs,
                                            task=workload.getTask("ReReco"),
                                            workloadSpec=self.workloadSpecPath,
                                            site=site,
                                            taskType='Merge')
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        jobSubmitter.algorithm()
        result = getJobsAction.execute(state='Created', jobType="Merge")
        self.assertEqual(len(result), 5)
        result = getJobsAction.execute(state='Executing', jobType="Merge")
        self.assertEqual(len(result), 5)
        result = getJobsAction.execute(state='Created', jobType="Processing")
        self.assertEqual(len(result), 5)
        result = getJobsAction.execute(state='Executing', jobType="Processing")
        self.assertEqual(len(result), 45)

        # Now let's test running thresholds
        # The scenario will be setup as follows: Move all current jobs as running
        # Create 300 Processing jobs and 300 merge jobs
        # Run 5 polling cycles, moving all pending jobs to running in between
        # Result is, merge is left at 30 running 0 pending and processing is left at 240 running 0 pending
        # Processing has 110 jobs in queue and Merge 280
        # This tests all threshold dynamics including the prioritization of merge over processing
        nSubs = 1
        nJobs = 300
        jobGroupList = self.createJobGroups(nSubs=nSubs, nJobs=nJobs,
                                            task=workload.getTask("ReReco"),
                                            workloadSpec=self.workloadSpecPath,
                                            site=site)
        jobGroupList.extend(self.createJobGroups(nSubs=nSubs, nJobs=nJobs,
                                                 task=workload.getTask("ReReco"),
                                                 workloadSpec=self.workloadSpecPath,
                                                 site=site,
                                                 taskType='Merge'))
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        getRunJobID = self.baDaoFactory(classname="LoadByWMBSID")
        setRunJobStatus = self.baDaoFactory(classname="SetStatus")

        for i in range(5):
            result = getJobsAction.execute(state='Executing')
            binds = []
            for jobId in result:
                binds.append({'id': jobId, 'retry_count': 0})
            runJobIds = getRunJobID.execute(binds)
            setRunJobStatus.execute([x['id'] for x in runJobIds], 'Running')
            jobSubmitter.algorithm()

        result = getJobsAction.execute(state='Executing', jobType='Processing')
        self.assertEqual(len(result), 240)
        result = getJobsAction.execute(state='Created', jobType='Processing')
        self.assertEqual(len(result), 110)
        result = getJobsAction.execute(state='Executing', jobType='Merge')
        self.assertEqual(len(result), 30)
        result = getJobsAction.execute(state='Created', jobType='Merge')
        self.assertEqual(len(result), 280)

        return

    def testC_prioTest(self):
        """
        _testC_prioTest_

        Test whether the correct job type, workflow and task id priorities
        are respected in the DAO
        """
        workload1 = self.createTestWorkload(name='testWorkload1')
        workload2 = self.createTestWorkload(name='testWorkload2')
        workload3 = self.createTestWorkload(name='testWorkload3')
        workload4 = self.createTestWorkload(name='testWorkload4')

        config = self.getConfig()
        changeState = ChangeState(config)
        getJobsAction = self.daoFactory(classname="Jobs.ListForSubmitter")

        site = "T1_US_FNAL"
        self.setResourceThresholds(site, pendingSlots=1000, runningSlots=1000,
                                   tasks=['Processing', 'Merge', 'Production', 'Harvesting', 'LogCollect'],
                                   Processing={'pendingSlots': 1000, 'runningSlots': 1000},
                                   Merge={'pendingSlots': 1000, 'runningSlots': 10000},
                                   Production={'pendingSlots': 1000, 'runningSlots': 1000},
                                   Harvesting={'pendingSlots': 1000, 'runningSlots': 1000},
                                   LogCollect={'pendingSlots': 1000, 'runningSlots': 1000})

        nSubs = 1
        nJobs = 5
        jobGroupList = []
        jobGroup = self.createJobGroups(nSubs=nSubs, nJobs=nJobs,
                                        task=workload1.getTask("ReReco"),
                                        workloadSpec=self.workloadSpecPath,
                                        site=site,
                                        name='OldestWorkflow')  # task_id = 1
        jobGroupList.extend(jobGroup)
        jobGroup = self.createJobGroups(nSubs=nSubs, nJobs=nJobs,
                                        task=workload1.getTask("ReReco"),
                                        workloadSpec=self.workloadSpecPath,
                                        site=site,
                                        taskType='Merge')  # task_id = 2
        jobGroupList.extend(jobGroup)
        jobGroup = self.createJobGroups(nSubs=nSubs, nJobs=nJobs,
                                        task=workload1.getTask("ReReco"),
                                        workloadSpec=self.workloadSpecPath,
                                        site=site,
                                        taskType='LogCollect')  # task_id = 3
        jobGroupList.extend(jobGroup)

        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        # retrieve all 15 jobs created so far
        result = getJobsAction.execute(limitRows=100)
        self.assertItemsEqual([int(j['task_prio']) for j in result],
                              [4] * 5 + [2] * 5 + [0] * 5)
        self.assertItemsEqual([int(j['wf_priority']) for j in result],
                              [1] * 15)
        self.assertItemsEqual([int(j['task_id']) for j in result],
                              [2] * 5 + [3] * 5 + [1] * 5)

        # now retrieve only 6 jobs (5 Merge and 1 LogCollect), wf prio=1
        result = getJobsAction.execute(limitRows=6)
        self.assertItemsEqual([int(j['task_prio']) for j in result], [4] * 5 + [2] * 1)

        jobGroupList = []
        jobGroup = self.createJobGroups(nSubs=nSubs, nJobs=nJobs, wfPrio=2,
                                        task=workload2.getTask("ReReco"),
                                        workloadSpec=self.workloadSpecPath,
                                        site=site, taskType='Merge')  # task_id = 4
        jobGroupList.extend(jobGroup)
        jobGroup = self.createJobGroups(nSubs=nSubs, nJobs=nJobs, wfPrio=3,
                                        task=workload3.getTask("ReReco"),
                                        workloadSpec=self.workloadSpecPath,
                                        site=site, taskType='Processing')  # task_id = 5
        jobGroupList.extend(jobGroup)
        jobGroup = self.createJobGroups(nSubs=nSubs, nJobs=nJobs, wfPrio=3,
                                        task=workload3.getTask("ReReco"),
                                        workloadSpec=self.workloadSpecPath,
                                        site=site, taskType='LogCollect')  # task_id = 6
        jobGroupList.extend(jobGroup)

        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        # retrieve all 30 jobs created so far
        result = getJobsAction.execute(limitRows=100)
        self.assertItemsEqual([int(j['task_prio']) for j in result],
                              [4] * 10 + [2] * 10 + [0] * 10)
        # merge prio 2, merge prio 1, logCol prio 3, logCol prio 1, proc prio 3, proc prio 1
        self.assertItemsEqual([int(j['wf_priority']) for j in result],
                              [2] * 5 + [1] * 5 + [3] * 5 + [1] * 5 + [3] * 5 + [1] * 5)
        # merge id 4, merge id 2, logCol id 6, logCol id 3, proc id 5, proc id 1
        self.assertItemsEqual([int(j['task_id']) for j in result],
                              [4] * 5 + [2] * 5 + [6] * 5 + [3] * 5 + [5] * 5 + [1] * 5)

        jobGroupList = []
        jobGroup = self.createJobGroups(nSubs=nSubs, nJobs=nJobs, wfPrio=2,
                                        task=workload4.getTask("ReReco"),
                                        workloadSpec=self.workloadSpecPath,
                                        site=site, taskType='Merge')  # task_id = 7
        jobGroupList.extend(jobGroup)

        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        # retrieve all 15 Merge jobs created so far
        result = getJobsAction.execute(limitRows=15)
        self.assertItemsEqual([int(j['task_prio']) for j in result], [4] * 15)
        # merge prio 2, merge prio 2, merge prio 1
        self.assertItemsEqual([int(j['wf_priority']) for j in result], [2] * 10 + [1] * 5)
        # merge id 7, merge id 4, merge id 2
        self.assertItemsEqual([int(j['task_id']) for j in result],
                              [7] * 5 + [4] * 5 + [2] * 5)

    def testC_prioritization(self):
        """
        _testC_prioritization_

        Check that jobs are prioritized by job type and by oldest workflow
        """
        workload = self.createTestWorkload()
        config = self.getConfig()
        changeState = ChangeState(config)

        nSubs = 1
        nJobs = 10
        site = "T1_US_FNAL"

        self.setResourceThresholds(site, pendingSlots=10, runningSlots=10000, tasks=['Processing', 'Merge'],
                                   Processing={'pendingSlots': 50, 'runningSlots': 10000},
                                   Merge={'pendingSlots': 10, 'runningSlots': 10000, 'priority': 5})

        # Always initialize the submitter after setting the sites, flaky!
        jobSubmitter = JobSubmitterPoller(config=config)

        jobGroupList = self.createJobGroups(nSubs=nSubs, nJobs=nJobs,
                                            task=workload.getTask("ReReco"),
                                            workloadSpec=self.workloadSpecPath,
                                            site=site,
                                            name='OldestWorkflow')
        jobGroupList.extend(self.createJobGroups(nSubs=nSubs, nJobs=nJobs,
                                                 task=workload.getTask("ReReco"),
                                                 workloadSpec=self.workloadSpecPath,
                                                 site=site,
                                                 taskType='Merge'))
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        jobSubmitter.algorithm()

        # Merge goes first
        getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs")
        result = getJobsAction.execute(state='Created', jobType="Merge")
        self.assertEqual(len(result), 0)
        result = getJobsAction.execute(state='Executing', jobType="Merge")
        self.assertEqual(len(result), 10)
        result = getJobsAction.execute(state='Created', jobType="Processing")
        self.assertEqual(len(result), 10)
        result = getJobsAction.execute(state='Executing', jobType="Processing")
        self.assertEqual(len(result), 0)

        # Create a newer workflow processing, and after some new jobs for an old workflow

        jobGroupList = self.createJobGroups(nSubs=nSubs, nJobs=nJobs,
                                            task=workload.getTask("ReReco"),
                                            workloadSpec=self.workloadSpecPath,
                                            site=site,
                                            name='OldestWorkflow')
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        jobGroupList = self.createJobGroups(nSubs=nSubs, nJobs=nJobs,
                                            task=workload.getTask("ReReco"),
                                            workloadSpec=self.workloadSpecPath,
                                            site=site,
                                            name='NewestWorkflow')
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        # Move pending jobs to running

        getRunJobID = self.baDaoFactory(classname="LoadByWMBSID")
        setRunJobStatus = self.baDaoFactory(classname="SetStatus")

        for idx in range(2):
            result = getJobsAction.execute(state='Executing')
            binds = []
            for jobId in result:
                binds.append({'id': jobId, 'retry_count': 0})
            runJobIds = getRunJobID.execute(binds)
            setRunJobStatus.execute([x['id'] for x in runJobIds], 'Running')

            # Run again on created workflows
            jobSubmitter.algorithm()

            result = getJobsAction.execute(state='Created', jobType="Merge")
            self.assertEqual(len(result), 0)
            result = getJobsAction.execute(state='Executing', jobType="Merge")
            self.assertEqual(len(result), 10)
            result = getJobsAction.execute(state='Created', jobType="Processing")
            self.assertEqual(len(result), 30 - (idx + 1) * 10)
            result = getJobsAction.execute(state='Executing', jobType="Processing")
            self.assertEqual(len(result), (idx + 1) * 10)

            # Check that older workflow goes first even with newer jobs
            getWorkflowAction = self.daoFactory(classname="Jobs.GetWorkflowTask")
            workflows = getWorkflowAction.execute(result)
            for workflow in workflows:
                self.assertEqual(workflow['name'], 'OldestWorkflow')

        return

    def testD_SubmitFailed(self):
        """
        _testD_SubmitFailed_

        Check if jobs without a possible site to run at go to SubmitFailed
        """
        workload = self.createTestWorkload()
        config = self.getConfig()
        changeState = ChangeState(config)

        nSubs = 2
        nJobs = 10

        jobGroupList = self.createJobGroups(nSubs=nSubs, nJobs=nJobs,
                                            task=workload.getTask("ReReco"),
                                            site=[],
                                            workloadSpec=self.workloadSpecPath)

        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        jobSubmitter = JobSubmitterPoller(config=config)
        jobSubmitter.algorithm()

        # Jobs should go to submit failed
        getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs")
        result = getJobsAction.execute(state='SubmitFailed', jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        return

    def testE_SiteModesTest(self):
        """
        _testE_SiteModesTest_

        Test the behavior of the submitter in response to the different
        states of the sites
        """
        workload = self.createTestWorkload()
        config = self.getConfig()
        changeState = ChangeState(config)
        nSubs = 1
        nJobs = 20

        sites = ['T2_US_Florida', 'T2_RU_INR', 'T3_CO_Uniandes', 'T1_US_FNAL']
        for site in sites:
            self.setResourceThresholds(site, pendingSlots=10, runningSlots=999999, tasks=['Processing', 'Merge'],
                                       Processing={'pendingSlots': 10, 'runningSlots': 999999},
                                       Merge={'pendingSlots': 10, 'runningSlots': 999999, 'priority': 5})

        myResourceControl = ResourceControl(config)
        myResourceControl.changeSiteState('T2_US_Florida', 'Draining')
        # First test that we prefer Normal over drain, and T1 over T2/T3
        jobGroupList = self.createJobGroups(nSubs=nSubs, nJobs=nJobs,
                                            site=[x for x in sites],
                                            task=workload.getTask("ReReco"),
                                            workloadSpec=self.workloadSpecPath)
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')
        jobSubmitter = JobSubmitterPoller(config=config)
        # Actually run it
        jobSubmitter.algorithm()

        getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs")
        result = getJobsAction.execute(state='Executing', jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        # All jobs should be at either FNAL, Taiwan or Uniandes. It's a random selection
        # Check assigned locations
        getLocationAction = self.daoFactory(classname="Jobs.GetLocation")
        locationDict = getLocationAction.execute([{'jobid': x} for x in result])
        for entry in locationDict:
            loc = entry['site_name']
            self.assertNotEqual(loc, 'T2_US_Florida')

        # Now set everything to down, check we don't submit anything
        for site in sites:
            myResourceControl.changeSiteState(site, 'Down')
        jobGroupList = self.createJobGroups(nSubs=nSubs, nJobs=nJobs,
                                            site=[x for x in sites],
                                            task=workload.getTask("ReReco"),
                                            workloadSpec=self.workloadSpecPath)
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')
        jobSubmitter.algorithm()
        # Nothing is submitted despite the empty slots at Uniandes and Florida
        result = getJobsAction.execute(state='Executing', jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        # Now set everything to Drain and create Merge jobs. Those should be submitted
        for site in sites:
            myResourceControl.changeSiteState(site, 'Draining')

        nSubsMerge = 1
        nJobsMerge = 5
        jobGroupList = self.createJobGroups(nSubs=nSubsMerge, nJobs=nJobsMerge,
                                            site=[x for x in sites],
                                            task=workload.getTask("ReReco"),
                                            workloadSpec=self.workloadSpecPath,
                                            taskType='Merge')

        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        jobSubmitter.algorithm()

        result = getJobsAction.execute(state='Executing', jobType='Merge')
        self.assertEqual(len(result), nSubsMerge * nJobsMerge)

        # Now set everything to Aborted, and create Merge jobs. Those should fail
        # since the can only run at one place
        for site in sites:
            myResourceControl.changeSiteState(site, 'Aborted')

        nSubsMerge = 1
        nJobsMerge = 5
        jobGroupList = self.createJobGroups(nSubs=nSubsMerge, nJobs=nJobsMerge,
                                            site=[x for x in sites],
                                            task=workload.getTask("ReReco"),
                                            workloadSpec=self.workloadSpecPath,
                                            taskType='Merge')

        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        jobSubmitter.algorithm()

        result = getJobsAction.execute(state='SubmitFailed', jobType='Merge')
        self.assertEqual(len(result), nSubsMerge * nJobsMerge)
        result = getJobsAction.execute(state='Executing', jobType='Processing')
        self.assertEqual(len(result), nSubs * nJobs)

        return

    def testJobSiteDrain(self):
        """
        _testJobSiteDrain_

        Test the behavior of jobs pending to a single site that is in drain mode
        """
        workload = self.createTestWorkload()
        config = self.getConfig()
        jobSubmitter = JobSubmitterPoller(config=config)
        myResourceControl = ResourceControl(config)
        changeState = ChangeState(config)
        getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs")

        nSubs = 1
        nJobs = 30

        site = 'T2_US_Nebraska'
        self.setResourceThresholds(site, pendingSlots=100, runningSlots=100,
                                   tasks=['Processing', 'Merge'],
                                   Processing={'pendingSlots': 10, 'runningSlots': 10},
                                   Merge={'pendingSlots': 10, 'runningSlots': 10, 'priority': 5})

        jobGroupList = self.createJobGroups(nSubs=nSubs, nJobs=nJobs,
                                            site=[site],
                                            task=workload.getTask("ReReco"),
                                            workloadSpec=self.workloadSpecPath)
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        # submit first 10 jobs
        jobSubmitter.algorithm()

        result = getJobsAction.execute(state='Executing', jobType="Processing")
        self.assertEqual(len(result), 10)

        myResourceControl.changeSiteState(site, 'Draining')

        # site is now in drain, so don't submit anything
        jobSubmitter.algorithm()

        # jobs were supposed to get killed, but I guess the MockPlugin doesnt do anything
        result = getJobsAction.execute(state='Executing', jobType="Processing")
        self.assertEqual(len(result), 10)
        result = getJobsAction.execute(state='created', jobType="Processing")
        self.assertEqual(len(result), 20)
        result = getJobsAction.execute(state='submitfailed', jobType="Processing")
        self.assertEqual(len(result), 0)

        # make sure the drain grace period expires...
        time.sleep(3)
        jobSubmitter.algorithm()

        result = getJobsAction.execute(state='Executing', jobType="Processing")
        self.assertEqual(len(result), 10)
        # the remaining jobs should have gone to submitfailed by now
        result = getJobsAction.execute(state='submitfailed', jobType="Processing")
        self.assertEqual(len(result), 20)
        result = getJobsAction.execute(state='created', jobType="Processing")
        self.assertEqual(len(result), 0)

    @attr('integration')
    def testF_PollerProfileTest(self):
        """
        _testF_PollerProfileTest_

        Submit a lot of jobs and test how long it takes for
        them to actually be submitted
        """

        workload = self.createTestWorkload()
        config = self.getConfig()
        changeState = ChangeState(config)

        nSubs = 100
        nJobs = 100
        site = "T1_US_FNAL"

        self.setResourceThresholds(site, pendingSlots=20000, runningSlots=999999, tasks=['Processing', 'Merge'],
                                   Processing={'pendingSlots': 10000, 'runningSlots': 999999},
                                   Merge={'pendingSlots': 10000, 'runningSlots': 999999, 'priority': 5})

        # Always initialize the submitter after setting the sites, flaky!
        JobSubmitterPoller(config=config)

        jobGroupList = self.createJobGroups(nSubs=nSubs, nJobs=nJobs,
                                            task=workload.getTask("ReReco"),
                                            workloadSpec=self.workloadSpecPath,
                                            site=site)

        jobGroupList.extend(self.createJobGroups(nSubs=nSubs, nJobs=nJobs,
                                                 task=workload.getTask("ReReco"),
                                                 workloadSpec=self.workloadSpecPath,
                                                 site=site,
                                                 taskType='Merge'))

        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        # Actually run it
        startTime = time.time()
        cProfile.runctx("JobSubmitterPoller(config=config).algorithm()", globals(), locals(), filename="testStats.stat")
        stopTime = time.time()

        print("Job took %f seconds to complete" % (stopTime - startTime))

        p = pstats.Stats('testStats.stat')
        p.sort_stats('cumulative')
        p.print_stats()

        return

    @attr('integration')
    def testMemoryProfile(self):
        """
        _testMemoryProfile_

        Creates 20k jobs and keep refreshing the cache and submitting
        them between the components cycle

        Example using memory_profiler library, unfortunately the source
        code has to be updated with decorators.
        NOTE: Never run it on jenkins
        """
        workload = self.createTestWorkload()
        config = self.getConfig()
        changeState = ChangeState(config)
        # myResourceControl = ResourceControl(config)

        nSubs = 20
        nJobs = 100

        sites = ['T2_US_Florida', 'T2_RU_INR', 'T3_CO_Uniandes', 'T1_US_FNAL']
        allSites = CRIC().PSNtoPNNMap('*')

        for site in allSites:
            self.setResourceThresholds(site, pendingSlots=20000, runningSlots=999999, tasks=['Processing', 'Merge'],
                                       Processing={'pendingSlots': 10000, 'runningSlots': 999999},
                                       Merge={'pendingSlots': 10000, 'runningSlots': 999999, 'priority': 5})

        # Always initialize the submitter after setting the sites, flaky!
        jobSubmitter = JobSubmitterPoller(config=config)

        self.createJobGroups(nSubs=nSubs, nJobs=nJobs, wfPrio=10,
                             task=workload.getTask("ReReco"),
                             workloadSpec=self.workloadSpecPath,
                             site=[x for x in sites], changeState=changeState)

        # Actually run it
        jobSubmitter.algorithm()  # cycle 1

        self.createJobGroups(nSubs=nSubs, nJobs=nJobs, wfPrio=10,
                             task=workload.getTask("ReReco"),
                             workloadSpec=self.workloadSpecPath,
                             site=[x for x in sites], changeState=changeState)
        # myResourceControl.changeSiteState('T2_US_Florida', 'Draining')
        jobSubmitter.algorithm()  # cycle 2

        self.createJobGroups(nSubs=nSubs, nJobs=nJobs, wfPrio=10,
                             task=workload.getTask("ReReco"),
                             workloadSpec=self.workloadSpecPath,
                             site=[x for x in sites], changeState=changeState)
        # myResourceControl.changeSiteState('T2_RU_INR', 'Draining')
        jobSubmitter.algorithm()  # cycle 3

        self.createJobGroups(nSubs=nSubs, nJobs=nJobs, wfPrio=10,
                             task=workload.getTask("ReReco"),
                             workloadSpec=self.workloadSpecPath,
                             site=[x for x in sites], changeState=changeState)
        # myResourceControl.changeSiteState('T3_CO_Uniandes', 'Draining')
        jobSubmitter.algorithm()  # cycle 4

        # myResourceControl.changeSiteState('T2_RU_INR', 'Normal')
        jobSubmitter.algorithm()  # cycle 5

        # myResourceControl.changeSiteState('T2_US_Florida', 'Normal')
        jobSubmitter.algorithm()  # cycle 6

        # myResourceControl.changeSiteState('T2_RU_INR', 'Normal')
        jobSubmitter.algorithm()  # cycle 7

        # myResourceControl.changeSiteState('T3_CO_Uniandes', 'Normal')
        jobSubmitter.algorithm()  # cycle 8
        jobSubmitter.algorithm()  # cycle 9, nothing to submit

        return
Beispiel #35
0
    def setUp(self):
        """
        setup for test.
        """

        myThread = threading.currentThread()

        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.tearDown()
        self.testInit.setSchema(
            customModules=["WMCore.WMBS", "WMCore.BossAir", "WMCore.ResourceControl", "WMCore.Agent.Database"],
            useDefault=False,
        )
        self.testInit.setupCouch("bossair_t/jobs", "JobDump")
        self.testInit.setupCouch("bossair_t/fwjrs", "FWJRDump")

        self.daoFactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi)
        self.getJobs = self.daoFactory(classname="Jobs.GetAllJobs")

        # Create sites in resourceControl
        resourceControl = ResourceControl()
        for site in self.sites:
            resourceControl.insertSite(
                siteName=site,
                pnn="se.%s" % (site),
                cmsName=site,
                ceName=site,
                plugin="CondorPlugin",
                pendingSlots=1000,
                runningSlots=2000,
            )
            resourceControl.insertThreshold(siteName=site, taskType="Processing", maxSlots=1000, pendingSlots=1000)
        resourceControl.insertSite(
            siteName="Xanadu", pnn="se.Xanadu", cmsName=site, ceName="Xanadu", plugin="TestPlugin"
        )
        resourceControl.insertThreshold(siteName="Xanadu", taskType="Processing", maxSlots=10000, pendingSlots=10000)

        resourceControl.insertSite(
            siteName="jade-cms.hip.fi",
            pnn="madhatter.csc.fi",
            cmsName=site,
            ceName="jade-cms.hip.fi",
            plugin="ARCPlugin",
        )
        resourceControl.insertThreshold(
            siteName="jade-cms.hip.fi", taskType="Processing", maxSlots=100, pendingSlots=100
        )
        # using this for glite submissions
        resourceControl.insertSite(
            siteName="grid-ce-01.ba.infn.it",
            pnn="storm-se-01.ba.infn.it",
            cmsName=site,
            ceName="grid-ce-01.ba.infn.it",
            plugin="gLitePlugin",
        )
        resourceControl.insertThreshold(
            siteName="grid-ce-01.ba.infn.it", taskType="Processing", maxSlots=50, pendingSlots=50
        )

        # Create user
        newuser = self.daoFactory(classname="Users.New")
        newuser.execute(dn="tapas", group_name="phgroup", role_name="cmsrole")

        # We actually need the user name
        self.user = getpass.getuser()

        # Change this to the working dir to keep track of error and log files from condor
        self.testDir = self.testInit.generateWorkDir()

        # Set heartbeat
        componentName = "test"
        self.heartbeatAPI = HeartbeatAPI(componentName)
        self.heartbeatAPI.registerComponent()
        componentName = "JobTracker"
        self.heartbeatAPI2 = HeartbeatAPI(componentName)
        self.heartbeatAPI2.registerComponent()

        return
Beispiel #36
0
class JobSubmitterTest(EmulatedUnitTestCase):
    """
    _JobSubmitterTest_

    Test class for the JobSubmitterPoller
    """
    def setUp(self):
        """
        _setUp_

        Standard setup: Now with 100% more couch
        """
        super(JobSubmitterTest, self).setUp()

        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules=[
            "WMCore.WMBS", "WMCore.BossAir", "WMCore.ResourceControl",
            "WMCore.Agent.Database"
        ])
        self.testInit.setupCouch("jobsubmitter_t/jobs", "JobDump")
        self.testInit.setupCouch("jobsubmitter_t/fwjrs", "FWJRDump")
        self.testInit.setupCouch("wmagent_summary_t", "WMStats")

        myThread = threading.currentThread()
        self.daoFactory = DAOFactory(package="WMCore.WMBS",
                                     logger=myThread.logger,
                                     dbinterface=myThread.dbi)
        self.baDaoFactory = DAOFactory(package="WMCore.BossAir",
                                       logger=myThread.logger,
                                       dbinterface=myThread.dbi)

        self.testDir = self.testInit.generateWorkDir()

        # Set heartbeat
        self.componentName = 'JobSubmitter'
        self.heartbeatAPI = HeartbeatAPI(self.componentName)
        self.heartbeatAPI.registerComponent()
        self.configFile = EmulatorSetup.setupWMAgentConfig()
        config = self.getConfig()
        myThread.logdbClient = MockLogDB(config.General.central_logdb_url,
                                         config.Agent.hostName,
                                         logger=None)
        return

    def tearDown(self):
        """
        _tearDown_

        Standard tearDown
        """
        myThread = threading.currentThread()
        self.testInit.clearDatabase()
        self.testInit.delWorkDir()
        self.testInit.tearDownCouch()
        EmulatorSetup.deleteConfig(self.configFile)
        myThread.logdbClient = None
        return

    def setResourceThresholds(self, site, **options):
        """
        _setResourceThresholds_

        Utility to set resource thresholds
        """
        if not options:
            options = {
                'state': 'Normal',
                'runningSlots': 10,
                'pendingSlots': 5,
                'tasks': ['Processing', 'Merge'],
                'Processing': {
                    'pendingSlots': 5,
                    'runningSlots': 10
                },
                'Merge': {
                    'pendingSlots': 2,
                    'runningSlots': 5
                }
            }

        resourceControl = ResourceControl()
        resourceControl.insertSite(siteName=site,
                                   pnn='se.%s' % (site),
                                   ceName=site,
                                   plugin="MockPlugin",
                                   pendingSlots=options['pendingSlots'],
                                   runningSlots=options['runningSlots'],
                                   cmsName=site)
        for task in options['tasks']:
            resourceControl.insertThreshold(
                siteName=site,
                taskType=task,
                maxSlots=options[task]['runningSlots'],
                pendingSlots=options[task]['pendingSlots'])
        if options.get('state'):
            resourceControl.changeSiteState(site, options.get('state'))

        return

    def createJobGroups(self,
                        nSubs,
                        nJobs,
                        task,
                        workloadSpec,
                        site,
                        taskType='Processing',
                        name=None):
        """
        _createJobGroups_

        Creates a series of jobGroups for submissions
        """

        jobGroupList = []

        if name is None:
            name = makeUUID()

        testWorkflow = Workflow(spec=workloadSpec,
                                owner="tapas",
                                name=name,
                                task="basicWorkload/Production")
        testWorkflow.create()

        # Create subscriptions
        for _ in range(nSubs):

            name = makeUUID()

            # Create Fileset, Subscription, jobGroup
            testFileset = Fileset(name=name)
            testFileset.create()
            testSubscription = Subscription(fileset=testFileset,
                                            workflow=testWorkflow,
                                            type=taskType,
                                            split_algo="FileBased")
            testSubscription.create()

            testJobGroup = JobGroup(subscription=testSubscription)
            testJobGroup.create()

            # Create jobs
            self.makeNJobs(name=name,
                           task=task,
                           nJobs=nJobs,
                           jobGroup=testJobGroup,
                           fileset=testFileset,
                           sub=testSubscription.exists(),
                           site=site)

            testFileset.commit()
            testJobGroup.commit()
            jobGroupList.append(testJobGroup)

        return jobGroupList

    def makeNJobs(self, name, task, nJobs, jobGroup, fileset, sub, site):
        """
        _makeNJobs_

        Make and return a WMBS Job and File
        This handles all those damn add-ons

        """
        # Set the CacheDir
        cacheDir = os.path.join(self.testDir, 'CacheDir')

        for n in range(nJobs):
            # First make a file
            # site = self.sites[0]
            testFile = File(lfn="/singleLfn/%s/%s" % (name, n),
                            size=1024,
                            events=10)
            fileset.addFile(testFile)

        fileset.commit()

        location = None
        if isinstance(site, list):
            if len(site) > 0:
                location = site[0]
        else:
            location = site

        index = 0
        for f in fileset.files:
            index += 1
            testJob = Job(name='%s-%i' % (name, index))
            testJob.addFile(f)
            testJob["location"] = location
            testJob["possiblePSN"] = set(site) if isinstance(
                site, list) else set([site])
            testJob['task'] = task.getPathName()
            testJob['sandbox'] = task.data.input.sandbox
            testJob['spec'] = os.path.join(self.testDir, 'basicWorkload.pcl')
            testJob['mask']['FirstEvent'] = 101
            testJob['priority'] = 101
            testJob['numberOfCores'] = 1
            jobCache = os.path.join(cacheDir, 'Sub_%i' % (sub),
                                    'Job_%i' % (index))
            os.makedirs(jobCache)
            testJob.create(jobGroup)
            testJob['cache_dir'] = jobCache
            testJob.save()
            jobGroup.add(testJob)
            output = open(os.path.join(jobCache, 'job.pkl'), 'w')
            pickle.dump(testJob, output)
            output.close()

        return testJob, testFile

    def getConfig(self):
        """
        _getConfig_

        Gets a basic config from default location
        """

        config = self.testInit.getConfiguration()
        self.testInit.generateWorkDir(config)

        config.component_("Agent")
        config.Agent.WMSpecDirectory = self.testDir
        config.Agent.agentName = 'testAgent'
        config.Agent.hostName = 'testAgent'
        config.Agent.componentName = self.componentName
        config.Agent.useHeartbeat = False

        # First the general stuff
        config.section_("General")
        config.General.workDir = os.getenv("TESTDIR", self.testDir)
        config.General.central_logdb_url = "http://localhost/testlogdb"

        # Now the CoreDatabase information
        config.section_("CoreDatabase")
        config.CoreDatabase.connectUrl = os.getenv("DATABASE")
        config.CoreDatabase.socket = os.getenv("DBSOCK")

        # BossAir and MockPlugin configuration
        config.section_("BossAir")
        config.BossAir.pluginNames = ['MockPlugin']
        # Here Test the CondorPlugin instead of MockPlugin
        # config.BossAir.pluginNames = ['CondorPlugin']
        config.BossAir.pluginDir = 'WMCore.BossAir.Plugins'
        config.BossAir.nCondorProcesses = 1
        config.BossAir.section_("MockPlugin")
        config.BossAir.MockPlugin.fakeReport = os.path.join(
            getTestBase(), 'WMComponent_t/JobSubmitter_t', "submit.sh")

        # JobSubmitter configuration
        config.component_("JobSubmitter")
        config.JobSubmitter.logLevel = 'DEBUG'
        config.JobSubmitter.maxThreads = 1
        config.JobSubmitter.pollInterval = 10
        config.JobSubmitter.submitScript = os.path.join(
            getTestBase(), 'WMComponent_t/JobSubmitter_t', 'submit.sh')
        config.JobSubmitter.componentDir = os.path.join(
            self.testDir, 'Components')
        config.JobSubmitter.workerThreads = 2
        config.JobSubmitter.jobsPerWorker = 200

        # JobStateMachine
        config.component_('JobStateMachine')
        config.JobStateMachine.couchurl = os.getenv('COUCHURL')
        config.JobStateMachine.couchDBName = "jobsubmitter_t"
        config.JobStateMachine.jobSummaryDBName = 'wmagent_summary_t'

        # TaskArchive setup (JobSubmitter needs this)
        config.component_("TaskArchiver")
        config.TaskArchiver.ReqMgr2ServiceURL = "https://cmsweb-dev.cern.ch/reqmgr2"

        # Needed, because this is a test
        try:
            os.makedirs(config.JobSubmitter.componentDir)
        except:
            pass

        return config

    def createTestWorkload(self):
        """
        _createTestWorkload_

        Creates a test workload for us to run on, hold the basic necessities.
        """

        workload = testWorkload()

        taskMaker = TaskMaker(workload,
                              os.path.join(self.testDir, 'workloadTest'))
        taskMaker.skipSubscription = True
        taskMaker.processWorkload()
        self.workloadSpecPath = os.path.join(
            self.testDir, 'workloadTest',
            "TestWorkload/WMSandbox/WMWorkload.pkl")

        return workload

    def testA_BasicTest(self):
        """
        Use the MockPlugin to create a simple test
        Check to see that all the jobs were "submitted",
        don't care about thresholds
        """
        workload = self.createTestWorkload()
        config = self.getConfig()
        changeState = ChangeState(config)

        nSubs = 2
        nJobs = 20
        site = "T2_US_UCSD"

        self.setResourceThresholds(site,
                                   pendingSlots=50,
                                   runningSlots=100,
                                   tasks=['Processing', 'Merge'],
                                   Processing={
                                       'pendingSlots': 50,
                                       'runningSlots': 100
                                   },
                                   Merge={
                                       'pendingSlots': 50,
                                       'runningSlots': 100
                                   })

        jobGroupList = self.createJobGroups(nSubs=nSubs,
                                            nJobs=nJobs,
                                            task=workload.getTask("ReReco"),
                                            workloadSpec=self.workloadSpecPath,
                                            site=site)
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        # Do pre-submit check
        getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs")
        result = getJobsAction.execute(state='Created', jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        jobSubmitter = JobSubmitterPoller(config=config)
        jobSubmitter.algorithm()

        # Check that jobs are in the right state
        result = getJobsAction.execute(state='Created', jobType="Processing")
        self.assertEqual(len(result), 0)
        result = getJobsAction.execute(state='Executing', jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        # Check assigned locations
        getLocationAction = self.daoFactory(classname="Jobs.GetLocation")
        for jobId in result:
            loc = getLocationAction.execute(jobid=jobId)
            self.assertEqual(loc, [['T2_US_UCSD']])

        # Run another cycle, it shouldn't submit anything. There isn't anything to submit
        jobSubmitter.algorithm()
        result = getJobsAction.execute(state='Created', jobType="Processing")
        self.assertEqual(len(result), 0)
        result = getJobsAction.execute(state='Executing', jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        nSubs = 1
        nJobs = 10

        # Submit another 10 jobs
        jobGroupList = self.createJobGroups(nSubs=nSubs,
                                            nJobs=nJobs,
                                            task=workload.getTask("ReReco"),
                                            workloadSpec=self.workloadSpecPath,
                                            site=site,
                                            taskType="Merge")
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        # Check that the jobs are available for submission and run another cycle
        result = getJobsAction.execute(state='Created', jobType="Merge")
        self.assertEqual(len(result), nSubs * nJobs)
        jobSubmitter.algorithm()

        # Check that the last 10 jobs were submitted as well.
        result = getJobsAction.execute(state='Created', jobType="Merge")
        self.assertEqual(len(result), 0)
        result = getJobsAction.execute(state='Executing', jobType="Merge")
        self.assertEqual(len(result), nSubs * nJobs)

        return

    def testB_thresholdTest(self):
        """
        _testB_thresholdTest_

        Check that the threshold management is working,
        this requires checks on pending/running jobs globally
        at a site and per task/site
        """
        workload = self.createTestWorkload()
        config = self.getConfig()
        changeState = ChangeState(config)

        nSubs = 5
        nJobs = 10
        site = "T1_US_FNAL"

        self.setResourceThresholds(site,
                                   pendingSlots=50,
                                   runningSlots=220,
                                   tasks=['Processing', 'Merge'],
                                   Processing={
                                       'pendingSlots': 45,
                                       'runningSlots': 200
                                   },
                                   Merge={
                                       'pendingSlots': 10,
                                       'runningSlots': 20,
                                       'priority': 5
                                   })

        # Always initialize the submitter after setting the sites, flaky!
        jobSubmitter = JobSubmitterPoller(config=config)

        jobGroupList = self.createJobGroups(nSubs=nSubs,
                                            nJobs=nJobs,
                                            task=workload.getTask("ReReco"),
                                            workloadSpec=self.workloadSpecPath,
                                            site=site)
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        # Do pre-submit check
        getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs")
        result = getJobsAction.execute(state='Created', jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        jobSubmitter.algorithm()

        # Check that jobs are in the right state,
        # here we are limited by the pending threshold for the Processing task (45)
        result = getJobsAction.execute(state='Created', jobType="Processing")
        self.assertEqual(len(result), 5)
        result = getJobsAction.execute(state='Executing', jobType="Processing")
        self.assertEqual(len(result), 45)

        # Check assigned locations
        getLocationAction = self.daoFactory(classname="Jobs.GetLocation")
        for jobId in result:
            loc = getLocationAction.execute(jobid=jobId)
            self.assertEqual(loc, [['T1_US_FNAL']])

        # Run another cycle, it shouldn't submit anything. Jobs are still in pending
        jobSubmitter.algorithm()
        result = getJobsAction.execute(state='Created', jobType="Processing")
        self.assertEqual(len(result), 5)
        result = getJobsAction.execute(state='Executing', jobType="Processing")
        self.assertEqual(len(result), 45)

        # Now put 10 Merge jobs, only 5 can be submitted, there we hit the global pending threshold for the site
        nSubs = 1
        nJobs = 10
        jobGroupList = self.createJobGroups(nSubs=nSubs,
                                            nJobs=nJobs,
                                            task=workload.getTask("ReReco"),
                                            workloadSpec=self.workloadSpecPath,
                                            site=site,
                                            taskType='Merge')
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        jobSubmitter.algorithm()
        result = getJobsAction.execute(state='Created', jobType="Merge")
        self.assertEqual(len(result), 5)
        result = getJobsAction.execute(state='Executing', jobType="Merge")
        self.assertEqual(len(result), 5)
        result = getJobsAction.execute(state='Created', jobType="Processing")
        self.assertEqual(len(result), 5)
        result = getJobsAction.execute(state='Executing', jobType="Processing")
        self.assertEqual(len(result), 45)

        # Now let's test running thresholds
        # The scenario will be setup as follows: Move all current jobs as running
        # Create 300 Processing jobs and 300 merge jobs
        # Run 5 polling cycles, moving all pending jobs to running in between
        # Result is, merge is left at 30 running 0 pending and processing is left at 240 running 0 pending
        # Processing has 110 jobs in queue and Merge 280
        # This tests all threshold dynamics including the prioritization of merge over processing
        nSubs = 1
        nJobs = 300
        jobGroupList = self.createJobGroups(nSubs=nSubs,
                                            nJobs=nJobs,
                                            task=workload.getTask("ReReco"),
                                            workloadSpec=self.workloadSpecPath,
                                            site=site)
        jobGroupList.extend(
            self.createJobGroups(nSubs=nSubs,
                                 nJobs=nJobs,
                                 task=workload.getTask("ReReco"),
                                 workloadSpec=self.workloadSpecPath,
                                 site=site,
                                 taskType='Merge'))
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        getRunJobID = self.baDaoFactory(classname="LoadByWMBSID")
        setRunJobStatus = self.baDaoFactory(classname="SetStatus")

        for i in range(5):
            result = getJobsAction.execute(state='Executing')
            binds = []
            for jobId in result:
                binds.append({'id': jobId, 'retry_count': 0})
            runJobIds = getRunJobID.execute(binds)
            setRunJobStatus.execute([x['id'] for x in runJobIds], 'Running')
            jobSubmitter.algorithm()

        result = getJobsAction.execute(state='Executing', jobType='Processing')
        self.assertEqual(len(result), 240)
        result = getJobsAction.execute(state='Created', jobType='Processing')
        self.assertEqual(len(result), 110)
        result = getJobsAction.execute(state='Executing', jobType='Merge')
        self.assertEqual(len(result), 30)
        result = getJobsAction.execute(state='Created', jobType='Merge')
        self.assertEqual(len(result), 280)

        return

    def testC_prioritization(self):
        """
        _testC_prioritization_

        Check that jobs are prioritized by job type and by oldest workflow
        """
        workload = self.createTestWorkload()
        config = self.getConfig()
        changeState = ChangeState(config)

        nSubs = 1
        nJobs = 10
        site = "T1_US_FNAL"

        self.setResourceThresholds(site,
                                   pendingSlots=10,
                                   runningSlots=10000,
                                   tasks=['Processing', 'Merge'],
                                   Processing={
                                       'pendingSlots': 50,
                                       'runningSlots': 10000
                                   },
                                   Merge={
                                       'pendingSlots': 10,
                                       'runningSlots': 10000,
                                       'priority': 5
                                   })

        # Always initialize the submitter after setting the sites, flaky!
        jobSubmitter = JobSubmitterPoller(config=config)

        jobGroupList = self.createJobGroups(nSubs=nSubs,
                                            nJobs=nJobs,
                                            task=workload.getTask("ReReco"),
                                            workloadSpec=self.workloadSpecPath,
                                            site=site,
                                            name='OldestWorkflow')
        jobGroupList.extend(
            self.createJobGroups(nSubs=nSubs,
                                 nJobs=nJobs,
                                 task=workload.getTask("ReReco"),
                                 workloadSpec=self.workloadSpecPath,
                                 site=site,
                                 taskType='Merge'))
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        jobSubmitter.algorithm()

        # Merge goes first
        getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs")
        result = getJobsAction.execute(state='Created', jobType="Merge")
        self.assertEqual(len(result), 0)
        result = getJobsAction.execute(state='Executing', jobType="Merge")
        self.assertEqual(len(result), 10)
        result = getJobsAction.execute(state='Created', jobType="Processing")
        self.assertEqual(len(result), 10)
        result = getJobsAction.execute(state='Executing', jobType="Processing")
        self.assertEqual(len(result), 0)

        # Create a newer workflow processing, and after some new jobs for an old workflow

        jobGroupList = self.createJobGroups(nSubs=nSubs,
                                            nJobs=nJobs,
                                            task=workload.getTask("ReReco"),
                                            workloadSpec=self.workloadSpecPath,
                                            site=site,
                                            name='OldestWorkflow')
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        jobGroupList = self.createJobGroups(nSubs=nSubs,
                                            nJobs=nJobs,
                                            task=workload.getTask("ReReco"),
                                            workloadSpec=self.workloadSpecPath,
                                            site=site,
                                            name='NewestWorkflow')
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        # Move pending jobs to running

        getRunJobID = self.baDaoFactory(classname="LoadByWMBSID")
        setRunJobStatus = self.baDaoFactory(classname="SetStatus")

        for idx in range(2):
            result = getJobsAction.execute(state='Executing')
            binds = []
            for jobId in result:
                binds.append({'id': jobId, 'retry_count': 0})
            runJobIds = getRunJobID.execute(binds)
            setRunJobStatus.execute([x['id'] for x in runJobIds], 'Running')

            # Run again on created workflows
            jobSubmitter.algorithm()

            result = getJobsAction.execute(state='Created', jobType="Merge")
            self.assertEqual(len(result), 0)
            result = getJobsAction.execute(state='Executing', jobType="Merge")
            self.assertEqual(len(result), 10)
            result = getJobsAction.execute(state='Created',
                                           jobType="Processing")
            self.assertEqual(len(result), 30 - (idx + 1) * 10)
            result = getJobsAction.execute(state='Executing',
                                           jobType="Processing")
            self.assertEqual(len(result), (idx + 1) * 10)

            # Check that older workflow goes first even with newer jobs
            getWorkflowAction = self.daoFactory(
                classname="Jobs.GetWorkflowTask")
            workflows = getWorkflowAction.execute(result)
            for workflow in workflows:
                self.assertEqual(workflow['name'], 'OldestWorkflow')

        return

    def testD_SubmitFailed(self):
        """
        _testD_SubmitFailed_

        Check if jobs without a possible site to run at go to SubmitFailed
        """
        workload = self.createTestWorkload()
        config = self.getConfig()
        changeState = ChangeState(config)

        nSubs = 2
        nJobs = 10

        jobGroupList = self.createJobGroups(nSubs=nSubs,
                                            nJobs=nJobs,
                                            task=workload.getTask("ReReco"),
                                            site=[],
                                            workloadSpec=self.workloadSpecPath)

        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        jobSubmitter = JobSubmitterPoller(config=config)
        jobSubmitter.algorithm()

        # Jobs should go to submit failed
        getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs")
        result = getJobsAction.execute(state='SubmitFailed',
                                       jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        return

    def testE_SiteModesTest(self):
        """
        _testE_SiteModesTest_

        Test the behavior of the submitter in response to the different
        states of the sites
        """
        workload = self.createTestWorkload()
        config = self.getConfig()
        changeState = ChangeState(config)
        nSubs = 1
        nJobs = 20

        sites = [
            'T2_US_Florida', 'T2_TW_Taiwan', 'T3_CO_Uniandes', 'T1_US_FNAL'
        ]
        for site in sites:
            self.setResourceThresholds(site,
                                       pendingSlots=10,
                                       runningSlots=-1,
                                       tasks=['Processing', 'Merge'],
                                       Processing={
                                           'pendingSlots': 10,
                                           'runningSlots': -1
                                       },
                                       Merge={
                                           'pendingSlots': 10,
                                           'runningSlots': -1,
                                           'priority': 5
                                       })

        myResourceControl = ResourceControl(config)
        myResourceControl.changeSiteState('T2_US_Florida', 'Draining')
        # First test that we prefer Normal over drain, and T1 over T2/T3
        jobGroupList = self.createJobGroups(nSubs=nSubs,
                                            nJobs=nJobs,
                                            site=[x for x in sites],
                                            task=workload.getTask("ReReco"),
                                            workloadSpec=self.workloadSpecPath)
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')
        jobSubmitter = JobSubmitterPoller(config=config)
        # Actually run it
        jobSubmitter.algorithm()

        getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs")
        result = getJobsAction.execute(state='Executing', jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        # All jobs should be at either FNAL, Taiwan or Uniandes. It's a random selection
        # Check assigned locations
        getLocationAction = self.daoFactory(classname="Jobs.GetLocation")
        locationDict = getLocationAction.execute([{
            'jobid': x
        } for x in result])
        for entry in locationDict:
            loc = entry['site_name']
            self.assertNotEqual(loc, 'T2_US_Florida')

        # Now set everything to down, check we don't submit anything
        for site in sites:
            myResourceControl.changeSiteState(site, 'Down')
        jobGroupList = self.createJobGroups(nSubs=nSubs,
                                            nJobs=nJobs,
                                            site=[x for x in sites],
                                            task=workload.getTask("ReReco"),
                                            workloadSpec=self.workloadSpecPath)
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')
        jobSubmitter.algorithm()
        # Nothing is submitted despite the empty slots at Uniandes and Florida
        result = getJobsAction.execute(state='Executing', jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        # Now set everything to Drain and create Merge jobs. Those should be submitted
        for site in sites:
            myResourceControl.changeSiteState(site, 'Draining')

        nSubsMerge = 1
        nJobsMerge = 5
        jobGroupList = self.createJobGroups(nSubs=nSubsMerge,
                                            nJobs=nJobsMerge,
                                            site=[x for x in sites],
                                            task=workload.getTask("ReReco"),
                                            workloadSpec=self.workloadSpecPath,
                                            taskType='Merge')

        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        jobSubmitter.algorithm()

        result = getJobsAction.execute(state='Executing', jobType='Merge')
        self.assertEqual(len(result), nSubsMerge * nJobsMerge)

        # Now set everything to Aborted, and create Merge jobs. Those should fail
        # since the can only run at one place
        for site in sites:
            myResourceControl.changeSiteState(site, 'Aborted')

        nSubsMerge = 1
        nJobsMerge = 5
        jobGroupList = self.createJobGroups(nSubs=nSubsMerge,
                                            nJobs=nJobsMerge,
                                            site=[x for x in sites],
                                            task=workload.getTask("ReReco"),
                                            workloadSpec=self.workloadSpecPath,
                                            taskType='Merge')

        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        jobSubmitter.algorithm()

        result = getJobsAction.execute(state='SubmitFailed', jobType='Merge')
        self.assertEqual(len(result), nSubsMerge * nJobsMerge)
        result = getJobsAction.execute(state='Executing', jobType='Processing')
        self.assertEqual(len(result), nSubs * nJobs)

        return

    @attr('integration')
    def testF_PollerProfileTest(self):
        """
        _testF_PollerProfileTest_

        Submit a lot of jobs and test how long it takes for
        them to actually be submitted
        """

        workload = self.createTestWorkload()
        config = self.getConfig()
        changeState = ChangeState(config)

        nSubs = 100
        nJobs = 100
        site = "T1_US_FNAL"

        self.setResourceThresholds(site,
                                   pendingSlots=20000,
                                   runningSlots=-1,
                                   tasks=['Processing', 'Merge'],
                                   Processing={
                                       'pendingSlots': 10000,
                                       'runningSlots': -1
                                   },
                                   Merge={
                                       'pendingSlots': 10000,
                                       'runningSlots': -1,
                                       'priority': 5
                                   })

        # Always initialize the submitter after setting the sites, flaky!
        JobSubmitterPoller(config=config)

        jobGroupList = self.createJobGroups(nSubs=nSubs,
                                            nJobs=nJobs,
                                            task=workload.getTask("ReReco"),
                                            workloadSpec=self.workloadSpecPath,
                                            site=site)

        jobGroupList.extend(
            self.createJobGroups(nSubs=nSubs,
                                 nJobs=nJobs,
                                 task=workload.getTask("ReReco"),
                                 workloadSpec=self.workloadSpecPath,
                                 site=site,
                                 taskType='Merge'))

        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        # Actually run it
        startTime = time.time()
        cProfile.runctx("JobSubmitterPoller(config=config).algorithm()",
                        globals(),
                        locals(),
                        filename="testStats.stat")
        stopTime = time.time()

        print("Job took %f seconds to complete" % (stopTime - startTime))

        p = pstats.Stats('testStats.stat')
        p.sort_stats('cumulative')
        p.print_stats()

        return
Beispiel #37
0
class JobSubmitterTest(unittest.TestCase):
    """
    _JobSubmitterTest_

    Test class for the JobSubmitterPoller
    """

    def setUp(self):
        """
        _setUp_

        Standard setup: Now with 100% more couch
        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(
            customModules=["WMCore.WMBS", "WMCore.BossAir", "WMCore.ResourceControl", "WMCore.Agent.Database"]
        )
        self.testInit.setupCouch("jobsubmitter_t/jobs", "JobDump")
        self.testInit.setupCouch("jobsubmitter_t/fwjrs", "FWJRDump")
        self.testInit.setupCouch("wmagent_summary_t", "WMStats")

        myThread = threading.currentThread()
        self.daoFactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi)
        self.baDaoFactory = DAOFactory(package="WMCore.BossAir", logger=myThread.logger, dbinterface=myThread.dbi)

        self.testDir = self.testInit.generateWorkDir()

        # Set heartbeat
        self.componentName = "JobSubmitter"
        self.heartbeatAPI = HeartbeatAPI(self.componentName)
        self.heartbeatAPI.registerComponent()
        self.configFile = EmulatorSetup.setupWMAgentConfig()

        return

    def tearDown(self):
        """
        _tearDown_

        Standard tearDown
        """
        self.testInit.clearDatabase()
        self.testInit.delWorkDir()
        self.testInit.tearDownCouch()
        EmulatorSetup.deleteConfig(self.configFile)
        return

    def setResourceThresholds(self, site, **options):
        """
        _setResourceThresholds_

        Utility to set resource thresholds
        """
        if not options:
            options = {
                "state": "Normal",
                "runningSlots": 10,
                "pendingSlots": 5,
                "tasks": ["Processing", "Merge"],
                "Processing": {"pendingSlots": 5, "runningSlots": 10},
                "Merge": {"pendingSlots": 2, "runningSlots": 5},
            }

        resourceControl = ResourceControl()
        resourceControl.insertSite(
            siteName=site,
            pnn="se.%s" % (site),
            ceName=site,
            plugin="MockPlugin",
            pendingSlots=options["pendingSlots"],
            runningSlots=options["runningSlots"],
            cmsName=site,
        )
        for task in options["tasks"]:
            resourceControl.insertThreshold(
                siteName=site,
                taskType=task,
                maxSlots=options[task]["runningSlots"],
                pendingSlots=options[task]["pendingSlots"],
            )
        if options.get("state"):
            resourceControl.changeSiteState(site, options.get("state"))

        return

    def createJobGroups(self, nSubs, nJobs, task, workloadSpec, site, taskType="Processing", name=None):
        """
        _createJobGroups_

        Creates a series of jobGroups for submissions
        """

        jobGroupList = []

        if name is None:
            name = makeUUID()

        testWorkflow = Workflow(spec=workloadSpec, owner="tapas", name=name, task="basicWorkload/Production")
        testWorkflow.create()

        # Create subscriptions
        for _ in range(nSubs):

            name = makeUUID()

            # Create Fileset, Subscription, jobGroup
            testFileset = Fileset(name=name)
            testFileset.create()
            testSubscription = Subscription(
                fileset=testFileset, workflow=testWorkflow, type=taskType, split_algo="FileBased"
            )
            testSubscription.create()

            testJobGroup = JobGroup(subscription=testSubscription)
            testJobGroup.create()

            # Create jobs
            self.makeNJobs(
                name=name,
                task=task,
                nJobs=nJobs,
                jobGroup=testJobGroup,
                fileset=testFileset,
                sub=testSubscription.exists(),
                site=site,
            )

            testFileset.commit()
            testJobGroup.commit()
            jobGroupList.append(testJobGroup)

        return jobGroupList

    def makeNJobs(self, name, task, nJobs, jobGroup, fileset, sub, site):
        """
        _makeNJobs_

        Make and return a WMBS Job and File
        This handles all those damn add-ons

        """
        # Set the CacheDir
        cacheDir = os.path.join(self.testDir, "CacheDir")

        for n in range(nJobs):
            # First make a file
            # site = self.sites[0]
            testFile = File(lfn="/singleLfn/%s/%s" % (name, n), size=1024, events=10)
            fileset.addFile(testFile)

        fileset.commit()

        location = None
        if isinstance(site, list):
            if len(site) > 0:
                location = site[0]
        else:
            location = site

        index = 0
        for f in fileset.files:
            index += 1
            testJob = Job(name="%s-%i" % (name, index))
            testJob.addFile(f)
            testJob["location"] = location
            testJob["possiblePSN"] = set(site) if isinstance(site, list) else set([site])
            testJob["task"] = task.getPathName()
            testJob["sandbox"] = task.data.input.sandbox
            testJob["spec"] = os.path.join(self.testDir, "basicWorkload.pcl")
            testJob["mask"]["FirstEvent"] = 101
            testJob["priority"] = 101
            testJob["numberOfCores"] = 1
            jobCache = os.path.join(cacheDir, "Sub_%i" % (sub), "Job_%i" % (index))
            os.makedirs(jobCache)
            testJob.create(jobGroup)
            testJob["cache_dir"] = jobCache
            testJob.save()
            jobGroup.add(testJob)
            output = open(os.path.join(jobCache, "job.pkl"), "w")
            pickle.dump(testJob, output)
            output.close()

        return testJob, testFile

    def getConfig(self):
        """
        _getConfig_

        Gets a basic config from default location
        """

        config = self.testInit.getConfiguration()
        self.testInit.generateWorkDir(config)

        config.component_("Agent")
        config.Agent.WMSpecDirectory = self.testDir
        config.Agent.agentName = "testAgent"
        config.Agent.componentName = self.componentName
        config.Agent.useHeartbeat = False

        # First the general stuff
        config.section_("General")
        config.General.workDir = os.getenv("TESTDIR", self.testDir)

        # Now the CoreDatabase information
        config.section_("CoreDatabase")
        config.CoreDatabase.connectUrl = os.getenv("DATABASE")
        config.CoreDatabase.socket = os.getenv("DBSOCK")

        # BossAir and MockPlugin configuration
        config.section_("BossAir")
        config.BossAir.pluginNames = ["MockPlugin"]
        # Here Test the CondorPlugin instead of MockPlugin
        # config.BossAir.pluginNames = ['CondorPlugin']
        config.BossAir.pluginDir = "WMCore.BossAir.Plugins"
        config.BossAir.multicoreTaskTypes = ["MultiProcessing", "MultiProduction"]
        config.BossAir.nCondorProcesses = 1
        config.BossAir.section_("MockPlugin")
        config.BossAir.MockPlugin.fakeReport = os.path.join(getTestBase(), "WMComponent_t/JobSubmitter_t", "submit.sh")

        # JobSubmitter configuration
        config.component_("JobSubmitter")
        config.JobSubmitter.logLevel = "DEBUG"
        config.JobSubmitter.maxThreads = 1
        config.JobSubmitter.pollInterval = 10
        config.JobSubmitter.submitScript = os.path.join(getTestBase(), "WMComponent_t/JobSubmitter_t", "submit.sh")
        config.JobSubmitter.componentDir = os.path.join(self.testDir, "Components")
        config.JobSubmitter.workerThreads = 2
        config.JobSubmitter.jobsPerWorker = 200

        # JobStateMachine
        config.component_("JobStateMachine")
        config.JobStateMachine.couchurl = os.getenv("COUCHURL")
        config.JobStateMachine.couchDBName = "jobsubmitter_t"
        config.JobStateMachine.jobSummaryDBName = "wmagent_summary_t"

        # Needed, because this is a test
        os.makedirs(config.JobSubmitter.componentDir)

        return config

    def createTestWorkload(self, workloadName="Tier1ReReco"):
        """
        _createTestWorkload_

        Creates a test workload for us to run on, hold the basic necessities.
        """

        workload = testWorkload(workloadName)

        taskMaker = TaskMaker(workload, os.path.join(self.testDir, "workloadTest"))
        taskMaker.skipSubscription = True
        taskMaker.processWorkload()

        return workload

    def testA_BasicTest(self):
        """
        Use the MockPlugin to create a simple test
        Check to see that all the jobs were "submitted",
        don't care about thresholds
        """
        workloadName = "basicWorkload"
        workload = self.createTestWorkload()
        config = self.getConfig()
        changeState = ChangeState(config)

        nSubs = 2
        nJobs = 20
        site = "T2_US_UCSD"

        self.setResourceThresholds(
            site,
            pendingSlots=50,
            runningSlots=100,
            tasks=["Processing", "Merge"],
            Processing={"pendingSlots": 50, "runningSlots": 100},
            Merge={"pendingSlots": 50, "runningSlots": 100},
        )

        jobGroupList = self.createJobGroups(
            nSubs=nSubs,
            nJobs=nJobs,
            task=workload.getTask("ReReco"),
            workloadSpec=os.path.join(self.testDir, "workloadTest", workloadName),
            site=site,
        )
        for group in jobGroupList:
            changeState.propagate(group.jobs, "created", "new")

        # Do pre-submit check
        getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs")
        result = getJobsAction.execute(state="Created", jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        jobSubmitter = JobSubmitterPoller(config=config)
        jobSubmitter.algorithm()

        # Check that jobs are in the right state
        result = getJobsAction.execute(state="Created", jobType="Processing")
        self.assertEqual(len(result), 0)
        result = getJobsAction.execute(state="Executing", jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        # Check assigned locations
        getLocationAction = self.daoFactory(classname="Jobs.GetLocation")
        for jobId in result:
            loc = getLocationAction.execute(jobid=jobId)
            self.assertEqual(loc, [["T2_US_UCSD"]])

        # Run another cycle, it shouldn't submit anything. There isn't anything to submit
        jobSubmitter.algorithm()
        result = getJobsAction.execute(state="Created", jobType="Processing")
        self.assertEqual(len(result), 0)
        result = getJobsAction.execute(state="Executing", jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        nSubs = 1
        nJobs = 10

        # Submit another 10 jobs
        jobGroupList = self.createJobGroups(
            nSubs=nSubs,
            nJobs=nJobs,
            task=workload.getTask("ReReco"),
            workloadSpec=os.path.join(self.testDir, "workloadTest", workloadName),
            site=site,
            taskType="Merge",
        )
        for group in jobGroupList:
            changeState.propagate(group.jobs, "created", "new")

        # Check that the jobs are available for submission and run another cycle
        result = getJobsAction.execute(state="Created", jobType="Merge")
        self.assertEqual(len(result), nSubs * nJobs)
        jobSubmitter.algorithm()

        # Check that the last 10 jobs were submitted as well.
        result = getJobsAction.execute(state="Created", jobType="Merge")
        self.assertEqual(len(result), 0)
        result = getJobsAction.execute(state="Executing", jobType="Merge")
        self.assertEqual(len(result), nSubs * nJobs)

        return

    def testB_thresholdTest(self):
        """
        _testB_thresholdTest_

        Check that the threshold management is working,
        this requires checks on pending/running jobs globally
        at a site and per task/site
        """
        workloadName = "basicWorkload"
        workload = self.createTestWorkload()
        config = self.getConfig()
        changeState = ChangeState(config)

        nSubs = 5
        nJobs = 10
        site = "T1_US_FNAL"

        self.setResourceThresholds(
            site,
            pendingSlots=50,
            runningSlots=200,
            tasks=["Processing", "Merge"],
            Processing={"pendingSlots": 45, "runningSlots": -1},
            Merge={"pendingSlots": 10, "runningSlots": 20, "priority": 5},
        )

        # Always initialize the submitter after setting the sites, flaky!
        jobSubmitter = JobSubmitterPoller(config=config)

        jobGroupList = self.createJobGroups(
            nSubs=nSubs,
            nJobs=nJobs,
            task=workload.getTask("ReReco"),
            workloadSpec=os.path.join(self.testDir, "workloadTest", workloadName),
            site=site,
        )
        for group in jobGroupList:
            changeState.propagate(group.jobs, "created", "new")

        # Do pre-submit check
        getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs")
        result = getJobsAction.execute(state="Created", jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        jobSubmitter.algorithm()

        # Check that jobs are in the right state,
        # here we are limited by the pending threshold for the Processing task (45)
        result = getJobsAction.execute(state="Created", jobType="Processing")
        self.assertEqual(len(result), 5)
        result = getJobsAction.execute(state="Executing", jobType="Processing")
        self.assertEqual(len(result), 45)

        # Check assigned locations
        getLocationAction = self.daoFactory(classname="Jobs.GetLocation")
        for jobId in result:
            loc = getLocationAction.execute(jobid=jobId)
            self.assertEqual(loc, [["T1_US_FNAL"]])

        # Run another cycle, it shouldn't submit anything. Jobs are still in pending
        jobSubmitter.algorithm()
        result = getJobsAction.execute(state="Created", jobType="Processing")
        self.assertEqual(len(result), 5)
        result = getJobsAction.execute(state="Executing", jobType="Processing")
        self.assertEqual(len(result), 45)

        # Now put 10 Merge jobs, only 5 can be submitted, there we hit the global pending threshold for the site
        nSubs = 1
        nJobs = 10
        jobGroupList = self.createJobGroups(
            nSubs=nSubs,
            nJobs=nJobs,
            task=workload.getTask("ReReco"),
            workloadSpec=os.path.join(self.testDir, "workloadTest", workloadName),
            site=site,
            taskType="Merge",
        )
        for group in jobGroupList:
            changeState.propagate(group.jobs, "created", "new")

        jobSubmitter.algorithm()
        result = getJobsAction.execute(state="Created", jobType="Merge")
        self.assertEqual(len(result), 5)
        result = getJobsAction.execute(state="Executing", jobType="Merge")
        self.assertEqual(len(result), 5)
        result = getJobsAction.execute(state="Created", jobType="Processing")
        self.assertEqual(len(result), 5)
        result = getJobsAction.execute(state="Executing", jobType="Processing")
        self.assertEqual(len(result), 45)

        # Now let's test running thresholds
        # The scenario will be setup as follows: Move all current jobs as running
        # Create 300 Processing jobs and 300 merge jobs
        # Run 5 polling cycles, moving all pending jobs to running in between
        # Result is, merge is left at 25 running 0 pending and processing is left at 215 running 0 pending
        # Processing has 135 jobs in queue and Merge 285
        # This tests all threshold dynamics including the prioritization of merge over processing
        nSubs = 1
        nJobs = 300
        jobGroupList = self.createJobGroups(
            nSubs=nSubs,
            nJobs=nJobs,
            task=workload.getTask("ReReco"),
            workloadSpec=os.path.join(self.testDir, "workloadTest", workloadName),
            site=site,
        )
        jobGroupList.extend(
            self.createJobGroups(
                nSubs=nSubs,
                nJobs=nJobs,
                task=workload.getTask("ReReco"),
                workloadSpec=os.path.join(self.testDir, "workloadTest", workloadName),
                site=site,
                taskType="Merge",
            )
        )
        for group in jobGroupList:
            changeState.propagate(group.jobs, "created", "new")

        getRunJobID = self.baDaoFactory(classname="LoadByWMBSID")
        setRunJobStatus = self.baDaoFactory(classname="SetStatus")

        for _ in range(5):
            result = getJobsAction.execute(state="Executing")
            binds = []
            for jobId in result:
                binds.append({"id": jobId, "retry_count": 0})
            runJobIds = getRunJobID.execute(binds)
            setRunJobStatus.execute([x["id"] for x in runJobIds], "Running")
            jobSubmitter.algorithm()

        result = getJobsAction.execute(state="Executing", jobType="Processing")
        self.assertEqual(len(result), 215)
        result = getJobsAction.execute(state="Created", jobType="Processing")
        self.assertEqual(len(result), 135)
        result = getJobsAction.execute(state="Executing", jobType="Merge")
        self.assertEqual(len(result), 25)
        result = getJobsAction.execute(state="Created", jobType="Merge")
        self.assertEqual(len(result), 285)

        return

    def testC_prioritization(self):
        """
        _testC_prioritization_

        Check that jobs are prioritized by job type and by oldest workflow
        """
        workloadName = "basicWorkload"
        workload = self.createTestWorkload()
        config = self.getConfig()
        changeState = ChangeState(config)

        nSubs = 1
        nJobs = 10
        site = "T1_US_FNAL"

        self.setResourceThresholds(
            site,
            pendingSlots=10,
            runningSlots=-1,
            tasks=["Processing", "Merge"],
            Processing={"pendingSlots": 50, "runningSlots": -1},
            Merge={"pendingSlots": 10, "runningSlots": -1, "priority": 5},
        )

        # Always initialize the submitter after setting the sites, flaky!
        jobSubmitter = JobSubmitterPoller(config=config)

        jobGroupList = self.createJobGroups(
            nSubs=nSubs,
            nJobs=nJobs,
            task=workload.getTask("ReReco"),
            workloadSpec=os.path.join(self.testDir, "workloadTest", workloadName),
            site=site,
            name="OldestWorkflow",
        )
        jobGroupList.extend(
            self.createJobGroups(
                nSubs=nSubs,
                nJobs=nJobs,
                task=workload.getTask("ReReco"),
                workloadSpec=os.path.join(self.testDir, "workloadTest", workloadName),
                site=site,
                taskType="Merge",
            )
        )
        for group in jobGroupList:
            changeState.propagate(group.jobs, "created", "new")

        jobSubmitter.algorithm()

        # Merge goes first
        getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs")
        result = getJobsAction.execute(state="Created", jobType="Merge")
        self.assertEqual(len(result), 0)
        result = getJobsAction.execute(state="Executing", jobType="Merge")
        self.assertEqual(len(result), 10)
        result = getJobsAction.execute(state="Created", jobType="Processing")
        self.assertEqual(len(result), 10)
        result = getJobsAction.execute(state="Executing", jobType="Processing")
        self.assertEqual(len(result), 0)

        # Create a newer workflow processing, and after some new jobs for an old workflow

        jobGroupList = self.createJobGroups(
            nSubs=nSubs,
            nJobs=nJobs,
            task=workload.getTask("ReReco"),
            workloadSpec=os.path.join(self.testDir, "workloadTest", workloadName),
            site=site,
            name="NewestWorkflow",
        )

        jobGroupList.extend(
            self.createJobGroups(
                nSubs=nSubs,
                nJobs=nJobs,
                task=workload.getTask("ReReco"),
                workloadSpec=os.path.join(self.testDir, "workloadTest", workloadName),
                site=site,
                name="OldestWorkflow",
            )
        )

        for group in jobGroupList:
            changeState.propagate(group.jobs, "created", "new")

        # Move pending jobs to running

        getRunJobID = self.baDaoFactory(classname="LoadByWMBSID")
        setRunJobStatus = self.baDaoFactory(classname="SetStatus")

        for idx in range(2):
            result = getJobsAction.execute(state="Executing")
            binds = []
            for jobId in result:
                binds.append({"id": jobId, "retry_count": 0})
            runJobIds = getRunJobID.execute(binds)
            setRunJobStatus.execute([x["id"] for x in runJobIds], "Running")

            # Run again on created workflows
            jobSubmitter.algorithm()

            result = getJobsAction.execute(state="Created", jobType="Merge")
            self.assertEqual(len(result), 0)
            result = getJobsAction.execute(state="Executing", jobType="Merge")
            self.assertEqual(len(result), 10)
            result = getJobsAction.execute(state="Created", jobType="Processing")
            self.assertEqual(len(result), 30 - (idx + 1) * 10)
            result = getJobsAction.execute(state="Executing", jobType="Processing")
            self.assertEqual(len(result), (idx + 1) * 10)

            # Check that older workflow goes first even with newer jobs
            getWorkflowAction = self.daoFactory(classname="Jobs.GetWorkflowTask")
            workflows = getWorkflowAction.execute(result)
            for workflow in workflows:
                self.assertEqual(workflow["name"], "OldestWorkflow")

        return

    def testD_SubmitFailed(self):
        """
        _testD_SubmitFailed_

        Check if jobs without a possible site to run at go to SubmitFailed
        """
        workloadName = "basicWorkload"
        workload = self.createTestWorkload()
        config = self.getConfig()
        changeState = ChangeState(config)

        nSubs = 2
        nJobs = 10

        jobGroupList = self.createJobGroups(
            nSubs=nSubs,
            nJobs=nJobs,
            task=workload.getTask("ReReco"),
            site=[],
            workloadSpec=os.path.join(self.testDir, "workloadTest", workloadName),
        )

        for group in jobGroupList:
            changeState.propagate(group.jobs, "created", "new")

        jobSubmitter = JobSubmitterPoller(config=config)
        jobSubmitter.algorithm()

        # Jobs should go to submit failed
        getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs")
        result = getJobsAction.execute(state="SubmitFailed", jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        return

    def testE_SiteModesTest(self):
        """
        _testE_SiteModesTest_

        Test the behavior of the submitter in response to the different
        states of the sites
        """
        workloadName = "basicWorkload"
        workload = self.createTestWorkload()
        config = self.getConfig()
        changeState = ChangeState(config)
        nSubs = 1
        nJobs = 20

        sites = ["T2_US_Florida", "T2_TW_Taiwan", "T3_CO_Uniandes", "T1_US_FNAL"]
        for site in sites:
            self.setResourceThresholds(
                site,
                pendingSlots=10,
                runningSlots=-1,
                tasks=["Processing", "Merge"],
                Processing={"pendingSlots": 10, "runningSlots": -1},
                Merge={"pendingSlots": 10, "runningSlots": -1, "priority": 5},
            )

        myResourceControl = ResourceControl(config)
        myResourceControl.changeSiteState("T2_US_Florida", "Draining")
        # First test that we prefer Normal over drain, and T1 over T2/T3
        jobGroupList = self.createJobGroups(
            nSubs=nSubs,
            nJobs=nJobs,
            site=[x for x in sites],
            task=workload.getTask("ReReco"),
            workloadSpec=os.path.join(self.testDir, "workloadTest", workloadName),
        )
        for group in jobGroupList:
            changeState.propagate(group.jobs, "created", "new")
        jobSubmitter = JobSubmitterPoller(config=config)
        # Actually run it
        jobSubmitter.algorithm()

        getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs")
        result = getJobsAction.execute(state="Executing", jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        # All jobs should be at either FNAL, Taiwan or Uniandes. It's a random selection
        # Check assigned locations
        getLocationAction = self.daoFactory(classname="Jobs.GetLocation")
        locationDict = getLocationAction.execute([{"jobid": x} for x in result])
        for entry in locationDict:
            loc = entry["site_name"]
            self.assertNotEqual(loc, "T2_US_Florida")

        # Now set everything to down, check we don't submit anything
        for site in sites:
            myResourceControl.changeSiteState(site, "Down")
        jobGroupList = self.createJobGroups(
            nSubs=nSubs,
            nJobs=nJobs,
            site=[x for x in sites],
            task=workload.getTask("ReReco"),
            workloadSpec=os.path.join(self.testDir, "workloadTest", workloadName),
        )
        for group in jobGroupList:
            changeState.propagate(group.jobs, "created", "new")
        jobSubmitter.algorithm()
        # Nothing is submitted despite the empty slots at Uniandes and Florida
        result = getJobsAction.execute(state="Executing", jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        # Now set everything to Drain and create Merge jobs. Those should be submitted
        for site in sites:
            myResourceControl.changeSiteState(site, "Draining")

        nSubsMerge = 1
        nJobsMerge = 5
        jobGroupList = self.createJobGroups(
            nSubs=nSubsMerge,
            nJobs=nJobsMerge,
            site=[x for x in sites],
            task=workload.getTask("ReReco"),
            workloadSpec=os.path.join(self.testDir, "workloadTest", workloadName),
            taskType="Merge",
        )

        for group in jobGroupList:
            changeState.propagate(group.jobs, "created", "new")

        jobSubmitter.algorithm()

        result = getJobsAction.execute(state="Executing", jobType="Merge")
        self.assertEqual(len(result), nSubsMerge * nJobsMerge)

        # Now set everything to Aborted, and create Merge jobs. Those should fail
        # since the can only run at one place
        for site in sites:
            myResourceControl.changeSiteState(site, "Aborted")

        nSubsMerge = 1
        nJobsMerge = 5
        jobGroupList = self.createJobGroups(
            nSubs=nSubsMerge,
            nJobs=nJobsMerge,
            site=[x for x in sites],
            task=workload.getTask("ReReco"),
            workloadSpec=os.path.join(self.testDir, "workloadTest", workloadName),
            taskType="Merge",
        )

        for group in jobGroupList:
            changeState.propagate(group.jobs, "created", "new")

        jobSubmitter.algorithm()

        result = getJobsAction.execute(state="SubmitFailed", jobType="Merge")
        self.assertEqual(len(result), nSubsMerge * nJobsMerge)
        result = getJobsAction.execute(state="Executing", jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        return

    @attr("integration")
    def testF_PollerProfileTest(self):
        """
        _testF_PollerProfileTest_

        Submit a lot of jobs and test how long it takes for
        them to actually be submitted
        """

        workloadName = "basicWorkload"
        workload = self.createTestWorkload()
        config = self.getConfig()
        changeState = ChangeState(config)

        nSubs = 100
        nJobs = 100
        site = "T1_US_FNAL"

        self.setResourceThresholds(
            site,
            pendingSlots=20000,
            runningSlots=-1,
            tasks=["Processing", "Merge"],
            Processing={"pendingSlots": 10000, "runningSlots": -1},
            Merge={"pendingSlots": 10000, "runningSlots": -1, "priority": 5},
        )

        # Always initialize the submitter after setting the sites, flaky!
        JobSubmitterPoller(config=config)

        jobGroupList = self.createJobGroups(
            nSubs=nSubs,
            nJobs=nJobs,
            task=workload.getTask("ReReco"),
            workloadSpec=os.path.join(self.testDir, "workloadTest", workloadName),
            site=site,
        )

        jobGroupList.extend(
            self.createJobGroups(
                nSubs=nSubs,
                nJobs=nJobs,
                task=workload.getTask("ReReco"),
                workloadSpec=os.path.join(self.testDir, "workloadTest", workloadName),
                site=site,
                taskType="Merge",
            )
        )

        for group in jobGroupList:
            changeState.propagate(group.jobs, "created", "new")

        # Actually run it
        startTime = time.time()
        cProfile.runctx("JobSubmitterPoller(config=config).algorithm()", globals(), locals(), filename="testStats.stat")
        stopTime = time.time()

        print "Job took %f seconds to complete" % (stopTime - startTime)

        p = pstats.Stats("testStats.stat")
        p.sort_stats("cumulative")
        p.print_stats()

        return
Beispiel #38
0
    def setUp(self):
        """
        setup for test.
        """

        myThread = threading.currentThread()

        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.tearDown()
        self.testInit.setSchema(customModules=[
            "WMCore.WMBS", "WMCore.BossAir", "WMCore.ResourceControl",
            "WMCore.Agent.Database"
        ],
                                useDefault=False)
        self.testInit.setupCouch("bossair_t/jobs", "JobDump")
        self.testInit.setupCouch("bossair_t/fwjrs", "FWJRDump")

        self.daoFactory = DAOFactory(package="WMCore.WMBS",
                                     logger=myThread.logger,
                                     dbinterface=myThread.dbi)
        self.getJobs = self.daoFactory(classname="Jobs.GetAllJobs")

        #Create sites in resourceControl
        resourceControl = ResourceControl()
        for site in self.sites:
            resourceControl.insertSite(siteName=site,
                                       pnn='se.%s' % (site),
                                       cmsName=site,
                                       ceName=site,
                                       plugin="CondorPlugin",
                                       pendingSlots=1000,
                                       runningSlots=2000)
            resourceControl.insertThreshold(siteName = site, taskType = 'Processing', \
                                            maxSlots = 1000, pendingSlots = 1000)
        resourceControl.insertSite(siteName='Xanadu',
                                   pnn='se.Xanadu',
                                   cmsName=site,
                                   ceName='Xanadu',
                                   plugin="TestPlugin")
        resourceControl.insertThreshold(siteName = 'Xanadu', taskType = 'Processing', \
                                        maxSlots = 10000, pendingSlots = 10000)

        resourceControl.insertSite(siteName='jade-cms.hip.fi',
                                   pnn='madhatter.csc.fi',
                                   cmsName=site,
                                   ceName='jade-cms.hip.fi',
                                   plugin="ARCPlugin")
        resourceControl.insertThreshold(siteName = 'jade-cms.hip.fi', taskType = 'Processing', \
                                        maxSlots = 100, pendingSlots = 100)
        # using this for glite submissions
        resourceControl.insertSite(siteName='grid-ce-01.ba.infn.it',
                                   pnn='storm-se-01.ba.infn.it',
                                   cmsName=site,
                                   ceName='grid-ce-01.ba.infn.it',
                                   plugin='gLitePlugin')
        resourceControl.insertThreshold(siteName = 'grid-ce-01.ba.infn.it', taskType = 'Processing', \
                                        maxSlots = 50, pendingSlots = 50)

        # Create user
        newuser = self.daoFactory(classname="Users.New")
        newuser.execute(dn="tapas", group_name="phgroup", role_name="cmsrole")

        # We actually need the user name
        self.user = getpass.getuser()

        # Change this to the working dir to keep track of error and log files from condor
        self.testDir = self.testInit.generateWorkDir()

        # Set heartbeat
        componentName = 'test'
        self.heartbeatAPI = HeartbeatAPI(componentName)
        self.heartbeatAPI.registerComponent()
        componentName = 'JobTracker'
        self.heartbeatAPI2 = HeartbeatAPI(componentName)
        self.heartbeatAPI2.registerComponent()

        return
Beispiel #39
0
class Harness:
    """
    Harness class that wraps standard functionality used in all daemon
    components
    """

    def __init__(self, config, compName=None):
        """
        init

        The constructor is empty as we have an initalization method
        that can be called inside new threads (we use thread local attributes
        at startup.

        Default intialization of the harness including setting some diagnostic
        messages
        """
        self.config = config

        # component name is always the class name of child class
        if not compName:
            compName = self.__class__.__name__

        if not compName in (self.config.listComponents_() + self.config.listWebapps_()):
            raise WMException(WMEXCEPTION["WMCORE-8"] + compName, "WMCORE-8")
        if not hasattr(self.config, "Agent"):
            self.config.section_("Agent")

        self.config.Agent.componentName = compName
        compSect = getattr(self.config, compName, None)
        if compSect == None:
            # Then we have a major problem - there's no section with this name
            logging.error("Could not find section %s in config" % compName)
            logging.error("We are returning, and hoping you know what you're doing!")
            logging.debug("Config: %s" % self.config)
            return
        # check if componentDir is set if not assign.
        if getattr(compSect, "componentDir", None) == None:
            if not hasattr(self.config, "General"):
                # Don't do anything.  Assume the user knows what they are doing.
                logging.error("Missing componentDir and General section in config")
                logging.error("Going to trust you to know what you're doing.")
                return

            compSect.componentDir = os.path.join(
                self.config.General.workDir, "Components", self.config.Agent.componentName
            )
        # we have name and location of the log files. Now make sure there
        # is a directory.
        try:
            if not os.path.isdir(compSect.componentDir):
                os.makedirs(compSect.componentDir)
        except Exception as ex:
            logging.error("Encountered exception while making componentDirs: %s" % str(ex))
            logging.error("Ignoring")

        self.threadManagerName = ""
        self.heartbeatAPI = None
        self.messages = {}
        self.logMsg = {}

        return

    def initInThread(self):
        """
        Default intialization of the harness including setting some diagnostic
        messages. This method is called when we call 'prepareToStart'
        """
        try:
            self.messages = {}

            compName = self.config.Agent.componentName
            compSect = getattr(self.config, compName, None)
            if not hasattr(compSect, "logFile"):
                if not getattr(compSect, "componentDir", None):
                    errorMessage = "No componentDir for log entries found!\n"
                    errorMessage += "Harness cannot run without componentDir.\n"
                    logging.error(errorMessage)
                    raise HarnessException(errorMessage)
                compSect.logFile = os.path.join(compSect.componentDir, "ComponentLog")
            print("Log file is: " + compSect.logFile)
            logHandler = RotatingFileHandler(compSect.logFile, "a", 1000000000, 3)
            logMsgFormat = getattr(
                compSect, "logMsgFormat", "%(asctime)s:%(thread)d:%(levelname)s:%(module)s:%(message)s"
            )
            logFormatter = logging.Formatter(logMsgFormat)
            logHandler.setFormatter(logFormatter)
            logLevelName = getattr(compSect, "logLevel", "INFO")
            logLevel = getattr(logging, logLevelName)
            logging.getLogger().addHandler(logHandler)
            logging.getLogger().setLevel(logLevel)
            self.logMsg = {
                "DEBUG": logging.DEBUG,
                "ERROR": logging.ERROR,
                "NOTSET": logging.NOTSET,
                "CRITICAL": logging.CRITICAL,
                "WARNING": logging.WARNING,
                "INFO": logging.INFO,
                "SQLDEBUG": logging.SQLDEBUG,
            }
            if hasattr(compSect, "logLevel") and compSect.logLevel in self.logMsg.keys():
                logging.getLogger().setLevel(self.logMsg[compSect.logLevel])
            WMLogging.sqldebug("wmcore level debug:")

            # If not previously set, force wmcore cache to current path
            if not os.environ.get("WMCORE_CACHE_DIR"):
                os.environ["WMCORE_CACHE_DIR"] = os.path.join(compSect.componentDir, ".wmcore_cache")

            logging.info(">>>Starting: " + compName + "<<<")
            # check which backend to use: MySQL, Oracle, etc... for core
            # services.
            # we recognize there can be more than one database.
            # be we offer a default database that is used for core services.
            logging.info(">>>Initializing default database")
            logging.info(">>>Check if connection is through socket")
            myThread = threading.currentThread()
            myThread.logger = logging.getLogger()
            logging.info(">>>Setting config for thread: ")
            myThread.config = self.config

            logging.info(">>>Building database connection string")
            # check if there is a premade string if not build it yourself.
            dbConfig = ConfigDBMap(self.config)
            dbStr = dbConfig.getDBUrl()
            options = dbConfig.getOption()
            # we only want one DBFactory per database so we will need to
            # to pass this on in case we are using threads.
            myThread.dbFactory = DBFactory(myThread.logger, dbStr, options)

            myThread.sql_transaction = True
            if myThread.dbFactory.engine:

                myThread.dbi = myThread.dbFactory.connect()
                myThread.transaction = Transaction(myThread.dbi)

            else:

                myThread.dbi = myThread.config.CoreDatabase.connectUrl
                myThread.sql_transaction = False

            # Attach a worker manager object to the main thread
            if not hasattr(myThread, "workerThreadManager"):
                myThread.workerThreadManager = WorkerThreadManager(self)
            else:
                myThread.workerThreadManager.terminateSlaves.clear()
            myThread.workerThreadManager.pauseWorkers()

            logging.info(">>>Initialize transaction dictionary")

            (connectDialect, junk) = dbStr.split(":", 1)

            if connectDialect.lower() == "mysql":
                myThread.dialect = "MySQL"
            elif connectDialect.lower() == "oracle":
                myThread.dialect = "Oracle"
            elif connectDialect.lower() == "sqlite":
                myThread.dialect = "SQLite"

            logging.info("Harness part constructor finished")
        except Exception as ex:
            logging.critical("Problem instantiating " + str(ex))
            logging.error("Traceback: %s" % str(traceback.format_exc()))
            raise

    def preInitialization(self):
        """
        _preInitialization_

        returns: nothing

        method that can be overloaded and will be called before the
        start component is called. (enables you to set message->handler
        mappings). You use the self.message dictionary of the base class
        to define the mappings.

        """
        pass

    def postInitialization(self):
        """
        _postInitialization_

        returns: nothing

        method that can be overloaded and will be called after the start
        component does the standard initialization, but before the wait
        (enables you to publish events when starting up)

        Define actions you want to execute before the actual message
        handling starts. E.g.: publishing some messages, or removing
        messages.

        """
        pass

    def logState(self):
        """
        _logState_

        returns: string

        method that can be overloaded to log additional state information
        (should return atring)
        """
        msg = "No additional state information for " + self.config.Agent.componentName
        return msg

    def publishItem(self, items):
        """
        _publishItem_

        returns: nothing

        A method that publishes a (dictionary) set or 1 item
        to a monitoring service.
        """
        # FIXME: do we need this method. If so we need to agree
        # FIXME: on some default monitoring publication mechanism.
        pass

    def __call__(self, event, payload):
        """
        Once upon a time this was for doing the handling of diagnostic messages

        With the test-deprecating of the MsgService based diagnostics, we've basically
        scratched this.

        I'm leaving this in so at least the framework is still there

        -mnorman
        """
        return

    def initialization(self):
        """
        _initialization__

        Used the handle initializing the MsgService.  The MsgService
        is no longer used.

        Removed but not deleted, since all sorts of things call it
        """
        return

    def prepareToStart(self):
        """
        _prepareToStart_

        returns: Nothing

        Starts the initialization procedure. It is mainly an aggregation method
        so it can easily used in tests.
        """
        self.state = "initialize"
        self.initInThread()
        # note: every component gets a (unique) name:
        # self.config.Agent.componentName
        logging.info(">>>Registering Component - %s" % self.config.Agent.componentName)

        if getattr(self.config.Agent, "useHeartbeat", True):
            self.heartbeatAPI = HeartbeatAPI(self.config.Agent.componentName)
            self.heartbeatAPI.registerComponent()

        logging.info(">>>Starting initialization")

        logging.info(">>>Setting default transaction")
        myThread = threading.currentThread()

        self.preInitialization()

        if myThread.sql_transaction:
            myThread.transaction.begin()

        self.initialization()
        self.postInitialization()

        if myThread.sql_transaction:
            myThread.transaction.commit()

        logging.info(">>>Committing default transaction")

        logging.info(">>>Starting worker threads")
        myThread.workerThreadManager.resumeWorkers()

        logging.info(">>>Initialization finished!\n")
        # wait for messages
        self.state = "active"

    def prepareToStop(self, wait=False, stopPayload=""):
        """
        _stopComponent

        Stops the component, including all worker threads. Allows call from
        test framework
        """
        # Stop all worker threads
        logging.info(">>>Terminating worker threads")
        myThread = threading.currentThread()
        try:
            myThread.workerThreadManager.terminateWorkers()
        except:
            # We may not have a thread manager
            pass

        if wait:
            logging.info(">>>Shut down of component " + "while waiting for threads to finish")
            # check if nr of threads is specified.
            activeThreads = 1
            if stopPayload != "":
                activeThreads = int(stopPayload)
                if activeThreads < 1:
                    activeThreads = 1
            while threading.activeCount() > activeThreads:
                logging.info(">>>Currently " + str(threading.activeCount()) + " threads active")
                logging.info(">>>Waiting for less then " + str(activeThreads) + " to be active")
                time.sleep(5)

    def handleMessage(self, type="", payload=""):
        """
        __handleMessage_

        Formerly used to handle messages - now non-functional
        Left here in case someone else is using it (i.e. PilotManager)
        """
        return

    def startDaemon(self, keepParent=False, compName=None):
        """
        Same result as start component, except that the comopnent
        is started as a daemon, after which you can close your xterm
        and the process will still run.

        The keepParent option enables us to keep the parent process
        which is used during testing,
        """
        msg = "Starting %s as a daemon " % (self.config.Agent.componentName)
        print(msg)
        if not compName:
            compName = self.__class__.__name__
        compSect = getattr(self.config, compName, None)
        msg = "Log will be in %s " % (compSect.componentDir)
        print(msg)
        # put the daemon config file in the work dir of this component.
        # FIXME: this file will be replaced by a database table.
        compSect = getattr(self.config, self.config.Agent.componentName, None)
        pid = createDaemon(compSect.componentDir, keepParent)
        # if this is not the parent start the component
        if pid == 0:
            self.startComponent()
        # if this is the parent return control to the testing environment.

    def startComponent(self):
        """
        _startComponent_

        returns: Nothing

        Start up the component, performs initialization and waits indefinitely
        Calling this method results in the application
        running in the xterm (not in daemon mode)

        """
        myThread = threading.currentThread()
        try:
            msg = "None"
            self.prepareToStart()
            while True:
                time.sleep(360)

        except Exception as ex:
            if self.state == "initialize":
                errormsg = """PostMortem: choked when initializing with error: %s\n""" % (str(ex))
                stackTrace = traceback.format_tb(sys.exc_info()[2], None)
                for stackFrame in stackTrace:
                    errormsg += stackFrame
            else:
                errormsg = ""
                stackTrace = traceback.format_tb(sys.exc_info()[2], None)
                for stackFrame in stackTrace:
                    errormsg += stackFrame
                logging.error(errormsg)
                logging.error(">>>Fatal Error, Preparing to Rollback Transaction")
                if getattr(myThread, "transaction", None) != None:
                    myThread.transaction.rollback()
                self.prepareToStop(False)
                errormsg = """
PostMortem: choked while handling messages  with error: %s
while trying to handle msg: %s
                """ % (
                    str(ex),
                    str(msg),
                )
            print(errormsg)
            logging.critical(errormsg)
            raise
        logging.info("System shutdown complete!")
        # this is to ensure exiting when in daemon mode.
        sys.exit()

    def __str__(self):
        """

        return: string

        String representation of the status of this component.
        """

        msg = "Status of this component : \n"
        msg += "\n"
        msg += ">>Event Subscriptions --> Handlers<<\n"
        msg += "------------------------------------\n"
        for message in self.messages.keys():
            msg += message + "-->" + str(self.messages[message]) + "\n"
        msg += "\n"
        msg += "\n"
        msg += ">>Parameters --> Values<<\n"
        msg += "-------------------------\n"
        msg += str(self.config)
        additionalMsg = self.logState()
        if additionalMsg != "":
            msg += "\n"
            msg += "Additional state information\n"
            msg += "----------------------------\n"
            msg += "\n"
            msg += str(additionalMsg)
            msg += "\n"
        return msg
Beispiel #40
0
    def testAddComponent(self):
        """
        _testAddComponent_

        Test creation of components and worker threads as well as the
        get heartbeat DAOs
        """
        comp1 = HeartbeatAPI("testComponent1", pollInterval=60, heartbeatTimeout=600)
        comp1.registerComponent()
        self.assertEqual(comp1.getHeartbeatInfo(), [])  # no worker thread yet

        comp1.registerWorker("testWorker1")
        self.assertEqual(len(comp1.getHeartbeatInfo()), 1)

        comp1.registerWorker("testWorker2")
        self.assertEqual(len(comp1.getHeartbeatInfo()), 2)

        comp2 = HeartbeatAPI("testComponent2", pollInterval=30, heartbeatTimeout=300)
        comp2.registerComponent()
        self.assertEqual(comp2.getHeartbeatInfo(), [])  # no worker thread yet
        self.assertEqual(len(comp2.getAllHeartbeatInfo()), 2)

        comp2.registerWorker("testWorker21")
        self.assertEqual(len(comp2.getHeartbeatInfo()), 1)
        self.assertEqual(len(comp2.getAllHeartbeatInfo()), 3)

        comp1.updateWorkerHeartbeat("testWorker1", "Running")
        comp1.updateWorkerHeartbeat("testWorker2", "Running")
        comp2.updateWorkerHeartbeat("testWorker21", "Running")
        self.assertEqual(len(comp1.getAllHeartbeatInfo()), 3)
        self.assertEqual(len(comp2.getAllHeartbeatInfo()), 3)

        comp1Res = comp1.getHeartbeatInfo()
        comp2Res = comp2.getHeartbeatInfo()
        self.assertEqual(len(comp1Res), 2)
        self.assertEqual(len(comp2Res), 1)

        self.assertItemsEqual([item["name"] for item in comp1Res], ["testComponent1", "testComponent1"])
        self.assertItemsEqual([item["worker_name"] for item in comp1Res], ["testWorker1", "testWorker2"])
        self.assertItemsEqual([item["state"] for item in comp1Res], ["Running", "Running"])
        self.assertItemsEqual([item["poll_interval"] for item in comp1Res], [60, 60])
        self.assertItemsEqual([item["update_threshold"] for item in comp1Res], [600, 600])

        self.assertItemsEqual([item["name"] for item in comp2Res], ["testComponent2"])
        self.assertItemsEqual([item["worker_name"] for item in comp2Res], ["testWorker21"])
        self.assertItemsEqual([item["state"] for item in comp2Res], ["Running"])
        self.assertItemsEqual([item["poll_interval"] for item in comp2Res], [30])
        self.assertItemsEqual([item["update_threshold"] for item in comp2Res], [300])
class DBSUploadTest(unittest.TestCase):
    """
    _DBSUploadTest_

    TestCase for DBSUpload module
    """

    _maxMessage = 10

    def setUp(self):
        """
        _setUp_

        setUp function for unittest
        """
        # Set constants
        self.couchDB      = "config_test"
        self.configURL    = "RANDOM;;URL;;NAME"
        self.configString = "This is a random string"

        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules =
                                ["WMComponent.DBS3Buffer",
                                 'WMCore.Agent.Database'],
                                useDefault = False)
        self.testInit.setupCouch(self.couchDB, "GroupUser", "ConfigCache")

        myThread = threading.currentThread()
        self.bufferFactory = DAOFactory(package = "WMComponent.DBSBuffer.Database",
                                        logger = myThread.logger,
                                        dbinterface = myThread.dbi)
        self.buffer3Factory = DAOFactory(package = "WMComponent.DBS3Buffer",
                                         logger = myThread.logger,
                                         dbinterface = myThread.dbi)

        locationAction = self.bufferFactory(classname = "DBSBufferFiles.AddLocation")
        locationAction.execute(siteName = "se1.cern.ch")
        locationAction.execute(siteName = "se1.fnal.gov")
        locationAction.execute(siteName = "malpaquet")

        # Set heartbeat
        self.componentName = 'JobSubmitter'
        self.heartbeatAPI  = HeartbeatAPI(self.componentName)
        self.heartbeatAPI.registerComponent()

        # Set up a config cache
        configCache = ConfigCache(os.environ["COUCHURL"], couchDBName = self.couchDB)
        configCache.createUserGroup(groupname = "testGroup", username = '******')
        self.testDir = self.testInit.generateWorkDir()

        psetPath = os.path.join(self.testDir, "PSet.txt")
        f = open(psetPath, 'w')
        f.write(self.configString)
        f.close()

        configCache.addConfig(newConfig = psetPath, psetHash = None)
        configCache.save()
        self.configURL = "%s;;%s;;%s" % (os.environ["COUCHURL"],
                                         self.couchDB,
                                         configCache.getCouchID())
        return

    def tearDown(self):
        """
        _tearDown_

        tearDown function for unittest
        """

        self.testInit.clearDatabase()
        self.testInit.tearDownCouch()
        self.testInit.delWorkDir()
        return

    def createConfig(self):
        """
        _createConfig_

        This creates the actual config file used by the component

        """
        config = Configuration()

        #First the general stuff
        config.section_("General")
        config.General.workDir = os.getenv("TESTDIR", os.getcwd())

        config.section_("Agent")
        config.Agent.componentName = 'DBSUpload'
        config.Agent.useHeartbeat    = False

        #Now the CoreDatabase information
        #This should be the dialect, dburl, etc
        config.section_("CoreDatabase")
        config.CoreDatabase.connectUrl = os.getenv("DATABASE")
        config.CoreDatabase.socket     = os.getenv("DBSOCK")


        config.component_("DBSUpload")
        config.DBSUpload.pollInterval  = 10
        config.DBSUpload.logLevel      = 'ERROR'
        config.DBSUpload.maxThreads    = 1
        config.DBSUpload.namespace     = 'WMComponent.DBSUpload.DBSUpload'
        config.DBSUpload.componentDir  = os.path.join(os.getcwd(), 'Components')
        config.DBSUpload.workerThreads = 4

        config.section_("DBSInterface")
        config.DBSInterface.globalDBSUrl     = 'http://vocms09.cern.ch:8880/cms_dbs_int_local_xx_writer/servlet/DBSServlet'
        config.DBSInterface.globalDBSVersion = 'DBS_2_0_9'
        config.DBSInterface.DBSUrl           = 'http://vocms09.cern.ch:8880/cms_dbs_int_local_yy_writer/servlet/DBSServlet'
        config.DBSInterface.DBSVersion       = 'DBS_2_0_9'
        config.DBSInterface.MaxFilesToCommit = 10

        # addition for Alerts messaging framework, work (alerts) and control
        # channel addresses to which the component will be sending alerts
        # these are destination addresses where AlertProcessor:Receiver listens
        config.section_("Alert")
        config.Alert.address = "tcp://127.0.0.1:5557"
        config.Alert.controlAddr = "tcp://127.0.0.1:5559"
        # configure threshold of DBS upload queue size alert threshold
        # reference: trac ticket #1628
        config.DBSUpload.alertUploadQueueSize = 2000

        return config

    def injectWorkflow(self, workflowName = 'TestWorkflow',
                       taskPath = '/TestWorkflow/ReadingEvents',
                       MaxWaitTime  = 10000,
                       MaxFiles = 10,
                       MaxEvents = 250000000,
                       MaxSize = 9999999999):
        """
        _injectWorklow_

        Inject a dummy worklow in DBSBuffer for testing,
        returns the workflow ID
        """
        injectWorkflowDAO = self.buffer3Factory("InsertWorkflow")
        workflowID = injectWorkflowDAO.execute(workflowName, taskPath,
                                               MaxWaitTime, MaxFiles,
                                               MaxEvents, MaxSize)
        return workflowID

    def getFiles(self, name, tier, nFiles = 12, site = "malpaquet", workflowName = None, taskPath = None,
                 noChild = False):
        """
        Create some quick dummy test files
        """

        if workflowName is not None and taskPath is not None:
            workflowId = self.injectWorkflow(workflowName = workflowName,
                                             taskPath = taskPath)
        else:
            workflowId = self.injectWorkflow()

        files = []

        for f in range(0, nFiles):
            testFile = DBSBufferFile(lfn = '%s-%s-%i' % (name, site, f), size = 1024,
                                     events = 20, checksums = {'cksum': 1}, workflowId = workflowId)
            testFile.setAlgorithm(appName = name, appVer = "CMSSW_3_1_1",
                                  appFam = "RECO", psetHash = "GIBBERISH",
                                  configContent = self.configURL)
            testFile.setDatasetPath("/%s/%s/%s" % (name, name, tier))
            testFile.addRun(Run( 1, *[f]))
            testFile.setGlobalTag("aGlobalTag")
            testFile.create()
            testFile.setLocation(site)
            files.append(testFile)

        if not noChild:
            testFileChild = DBSBufferFile(lfn = '%s-%s-child' %(name, site), size = 1024,
                                     events = 10, checksums = {'cksum': 1},
                                     workflowId = workflowId)
            testFileChild.setAlgorithm(appName = name, appVer = "CMSSW_3_1_1",
                                  appFam = "RECO", psetHash = "GIBBERISH",
                                  configContent = self.configURL)
            testFileChild.setDatasetPath("/%s/%s_2/RECO" %(name, name))
            testFileChild.addRun(Run( 1, *[45]))
            testFileChild.setGlobalTag("aGlobalTag")
            testFileChild.create()
            testFileChild.setLocation(site)

            testFileChild.addParents([x['lfn'] for x in files])


        return files


    @attr('integration')
    def testA_basicUploadTest(self):
        """
        _basicUploadTest_

        Do everything simply once
        Create dataset, algo, files, blocks,
        upload them,
        mark as done, finish them, migrate them
        Also check the timeout
        """
        myThread = threading.currentThread()
        config = self.createConfig()
        self.injectWorkflow(MaxWaitTime = 3)
        config.DBSUpload.pollInterval  = 4

        name = "ThisIsATest_%s" % (makeUUID())
        tier = "RECO"
        nFiles = 12
        files = self.getFiles(name = name, tier = tier, nFiles = nFiles)
        datasetPath = '/%s/%s/%s' % (name, name, tier)


        # Load components that are necessary to check status
        factory     = WMFactory("dbsUpload", "WMComponent.DBSUpload.Database.Interface")
        dbinterface = factory.loadObject("UploadToDBS")

        dbsInterface = DBSInterface(config = config)
        localAPI     = dbsInterface.getAPIRef()
        globeAPI     = dbsInterface.getAPIRef(globalRef = True)

        # In the first round we should create blocks for the first dataset
        # The child dataset should not be handled until the parent is uploaded
        testDBSUpload = DBSUploadPoller(config = config)
        testDBSUpload.algorithm()

        # First, see if there are any blocks
        # One in DBS, one not in DBS
        result = myThread.dbi.processData("SELECT status FROM dbsbuffer_block")[0].fetchall()
        self.assertEqual(len(result), 2)
        self.assertEqual(result, [('InGlobalDBS',), ('Open',)])

        # Check to see if datasets and algos are in local DBS
        result  = listAlgorithms(apiRef = localAPI, patternExe = name)
        self.assertEqual(len(result), 1)
        self.assertEqual(result[0]['ExecutableName'], name)
        result  = listPrimaryDatasets(apiRef = localAPI, match = name)
        self.assertEqual(result, [name])
        result    = listProcessedDatasets(apiRef = localAPI, primary = name, dataTier = "*")

        # Then check and see that the closed block made it into local DBS
        affectedBlocks = listBlocks(apiRef = localAPI, datasetPath = datasetPath)
        if affectedBlocks[0]['OpenForWriting'] == '0':
            self.assertEqual(affectedBlocks[1]['OpenForWriting'], '1')
            self.assertEqual(affectedBlocks[0]['NumberOfFiles'], 10)
            self.assertEqual(affectedBlocks[1]['NumberOfFiles'], 2)
        else:
            self.assertEqual(affectedBlocks[0]['OpenForWriting'], '1')
            self.assertEqual(affectedBlocks[1]['NumberOfFiles'], 10)
            self.assertEqual(affectedBlocks[0]['NumberOfFiles'], 2)

        # Check to make sure all the files are in local
        result = listDatasetFiles(apiRef = localAPI, datasetPath = datasetPath)
        fileLFNs = [x['lfn'] for x in files]
        for lfn in fileLFNs:
            self.assertTrue(lfn in result)

        # Make sure the child files aren't there
        flag = False
        try:
            listDatasetFiles(apiRef = localAPI,
                             datasetPath = '/%s/%s_2/%s' % (name, name, tier))
        except Exception as ex:
            flag = True
        self.assertTrue(flag)


        # There should be one blocks in global
        # It should have ten files and be closed
        result    = listBlocks(apiRef = globeAPI, datasetPath = datasetPath)
        self.assertEqual(len(result), 1)
        for block in result:
            self.assertEqual(block['OpenForWriting'], '0')
            self.assertTrue(block['NumberOfFiles'] in [2, 10])

        # Okay, deep breath.  First round done
        # In the second round, the second block of the parent fileset should transfer
        # Make sure that the timeout functions work
        time.sleep(10)
        testDBSUpload.algorithm()

        result = myThread.dbi.processData("SELECT status FROM dbsbuffer_block")[0].fetchall()
        self.assertEqual(len(result), 2)
        self.assertEqual(result, [('InGlobalDBS',), ('InGlobalDBS',)])

        # Check to make sure all the files are in global
        result = listDatasetFiles(apiRef = globeAPI, datasetPath = datasetPath)
        for lfn in fileLFNs:
            self.assertTrue(lfn in result)

        # Make sure the child files aren't there
        flag = False
        try:
            listDatasetFiles(apiRef = localAPI,
                             datasetPath = '/%s/%s_2/%s' % (name, name, tier))
        except Exception as ex:
            flag = True
        self.assertTrue(flag)

        # Third round
        # Both of the parent blocks should have transferred
        # So the child block should now transfer
        testDBSUpload.algorithm()

        result = myThread.dbi.processData("SELECT status FROM dbsbuffer_block")[0].fetchall()
        self.assertEqual(result, [('InGlobalDBS',), ('InGlobalDBS',), ('Open',)])


        flag = False
        try:
            result = listDatasetFiles(apiRef = localAPI,
                                      datasetPath = '/%s/%s_2/%s' % (name, name, tier))
        except Exception as ex:
            flag = True
        self.assertFalse(flag)

        self.assertEqual(len(result), 1)

        return


    @attr('integration')
    def testB_AlgoMigration(self):
        """
        _AlgoMigration_

        Test our ability to migrate multiple algos to global

        Do this by creating, mid-poll, two separate batches of files
        One with the same dataset but a different algo
        One with the same algo, but a different dataset
        See that they both get to global
        """
        #raise nose.SkipTest
        myThread = threading.currentThread()
        config = self.createConfig()
        self.injectWorkflow(MaxWaitTime = 20)
        name = "ThisIsATest_%s" % (makeUUID())
        tier = "RECO"
        nFiles = 12
        files = self.getFiles(name = name, tier = tier, nFiles = nFiles)
        datasetPath = '/%s/%s/%s' % (name, name, tier)


        # Load components that are necessary to check status
        factory     = WMFactory("dbsUpload", "WMComponent.DBSUpload.Database.Interface")
        dbinterface = factory.loadObject("UploadToDBS")

        dbsInterface = DBSInterface(config = config)
        localAPI     = dbsInterface.getAPIRef()
        globeAPI     = dbsInterface.getAPIRef(globalRef = True)


        testDBSUpload = DBSUploadPoller(config = config)
        testDBSUpload.algorithm()

        # There should now be one block
        result    = listBlocks(apiRef = globeAPI, datasetPath = datasetPath)
        self.assertEqual(len(result), 1)

        # Okay, by now, the first migration should have gone through.
        # Now create a second batch of files with the same dataset
        # but a different algo.
        for i in range(0, nFiles):
            testFile = DBSBufferFile(lfn = '%s-batch2-%i' %(name, i), size = 1024,
                                     events = 20, checksums = {'cksum': 1},
                                     locations = "malpaquet")
            testFile.setAlgorithm(appName = "cmsRun", appVer = "CMSSW_3_1_1",
                                  appFam = tier, psetHash = "GIBBERISH_PART2",
                                  configContent = self.configURL)
            testFile.setDatasetPath(datasetPath)
            testFile.addRun(Run( 1, *[46]))
            testFile.create()


        # Have to do things twice to get parents
        testDBSUpload.algorithm()
        testDBSUpload.algorithm()

        # There should now be two blocks
        result    = listBlocks(apiRef = globeAPI, datasetPath = datasetPath)
        self.assertEqual(len(result), 2)


        # Now create another batch of files with the original algo
        # But in a different dataset
        for i in range(0, nFiles):
            testFile = DBSBufferFile(lfn = '%s-batch3-%i' %(name, i), size = 1024,
                                     events = 20, checksums = {'cksum': 1},
                                     locations = "malpaquet")
            testFile.setAlgorithm(appName = name, appVer = "CMSSW_3_1_1",
                                  appFam = tier, psetHash = "GIBBERISH",
                                  configContent = self.configURL)
            testFile.setDatasetPath('/%s/%s_3/%s' % (name, name, tier))
            testFile.addRun(Run( 1, *[46]))
            testFile.create()

        # Do it twice for parentage.
        testDBSUpload.algorithm()
        testDBSUpload.algorithm()


        # There should now be one block
        result    = listBlocks(apiRef = globeAPI, datasetPath = '/%s/%s_3/%s' % (name, name, tier))
        self.assertEqual(len(result), 1)


        # Well, all the blocks got there, so we're done
        return


    @attr('integration')
    def testC_FailTest(self):
        """
        _FailTest_

        THIS TEST IS DANGEROUS!
        Figure out what happens when we trigger rollbacks
        """
        myThread = threading.currentThread()
        config = self.createConfig()
        config.DBSUpload.abortStepTwo = True

        originalOut = sys.stdout
        originalErr = sys.stderr

        dbsInterface = DBSInterface(config = config)
        localAPI     = dbsInterface.getAPIRef()
        globeAPI     = dbsInterface.getAPIRef(globalRef = True)

        name = "ThisIsATest_%s" % (makeUUID())
        tier = "RECO"
        nFiles = 12
        files = self.getFiles(name = name, tier = tier, nFiles = nFiles)
        datasetPath = '/%s/%s/%s' % (name, name, tier)

        testDBSUpload = DBSUploadPoller(config = config)

        try:
            testDBSUpload.algorithm()
        except Exception as ex:
            pass

        # Aborting in step two should result in no results
        result = myThread.dbi.processData("SELECT status FROM dbsbuffer_block")[0].fetchall()
        self.assertEqual(len(result), 0)

        config.DBSUpload.abortStepTwo   = False
        config.DBSUpload.abortStepThree = True
        testDBSUpload = DBSUploadPoller(config = config)

        try:
            testDBSUpload.algorithm()
        except Exception as ex:
            pass


        result = myThread.dbi.processData("SELECT status FROM dbsbuffer_block")[0].fetchall()
        self.assertEqual(result, [('Pending',), ('Open',)])
        result = myThread.dbi.processData("SELECT status FROM dbsbuffer_file WHERE dataset_algo = 1")[0].fetchall()
        for res in result:
            self.assertEqual(res[0], 'READY')

        config.DBSUpload.abortStepThree     = False
        self.injectWorkflow(MaxWaitTime = 300)
        testDBSUpload = DBSUploadPoller(config = config)
        testDBSUpload.algorithm()

        # After this, one block should have been uploaded, one should still be open
        # This is the result of the pending block updating, and the open block staying open
        result = myThread.dbi.processData("SELECT status, id FROM dbsbuffer_block")[0].fetchall()
        self.assertEqual(result, [('InGlobalDBS', 3L), ('Open', 4L)])

        # Check that one block got there
        result    = listBlocks(apiRef = globeAPI, datasetPath = datasetPath)
        self.assertEqual(len(result), 1)
        self.assertEqual(result[0]['NumberOfFiles'], 10)
        self.assertEqual(result[0]['NumberOfEvents'], 200)
        self.assertEqual(result[0]['BlockSize'], 10240)

        # Check that ten files got there
        result = listDatasetFiles(apiRef = globeAPI, datasetPath = datasetPath)
        self.assertEqual(len(result), 10)

        myThread.dbi.processData("UPDATE dbsbuffer_workflow SET block_close_max_wait_time = 1")
        testDBSUpload = DBSUploadPoller(config = config)
        time.sleep(3)
        testDBSUpload.algorithm()

        result = myThread.dbi.processData("SELECT status FROM dbsbuffer_block")[0].fetchall()
        self.assertEqual(result, [('InGlobalDBS',), ('InGlobalDBS',)])

        result = listDatasetFiles(apiRef = globeAPI, datasetPath = datasetPath)
        self.assertEqual(len(result), 12)

        fileLFNs = [x['lfn'] for x in files]
        for lfn in fileLFNs:
            self.assertTrue(lfn in result)

        testDBSUpload.algorithm()
        result = myThread.dbi.processData("SELECT status FROM dbsbuffer_block")[0].fetchall()
        self.assertEqual(result, [('InGlobalDBS',), ('InGlobalDBS',), ('Open',)])

        time.sleep(5)
        testDBSUpload.algorithm()
        time.sleep(2)
        result = myThread.dbi.processData("SELECT status FROM dbsbuffer_block")[0].fetchall()
        self.assertEqual(result, [('InGlobalDBS',), ('InGlobalDBS',), ('InGlobalDBS',)])

        result = listDatasetFiles(apiRef = globeAPI,
                                  datasetPath = '/%s/%s_2/%s' % (name, name, tier))
        self.assertEqual(len(result), 1)

        sys.stdout = originalOut
        sys.stderr = originalErr

        return



    @attr('integration')
    def testD_Profile(self):
        """
        _Profile_

        Profile with cProfile and time various pieces
        """
        return
        config = self.createConfig()

        name = "ThisIsATest_%s" % (makeUUID())
        tier = "RECO"
        nFiles = 500
        files = self.getFiles(name = name, tier = tier, nFiles = nFiles)
        datasetPath = '/%s/%s/%s' % (name, name, tier)


        testDBSUpload = DBSUploadPoller(config = config)
        cProfile.runctx("testDBSUpload.algorithm()", globals(), locals(), filename = "testStats.stat")

        p = pstats.Stats('testStats.stat')
        p.sort_stats('cumulative')
        p.print_stats(0.2)

        return

    @attr('integration')
    def testE_NoMigration(self):
        """
        _NoMigration_

        Test the DBSUpload system with no global migration
        """
        myThread = threading.currentThread()
        config = self.createConfig()
        self.injectWorkflow(MaxWaitTime = 3)
        config.DBSInterface.doGlobalMigration = False
        config.DBSUpload.pollInterval         = 4

        name = "ThisIsATest_%s" % (makeUUID())
        tier = "RECO"
        nFiles = 12
        files = self.getFiles(name = name, tier = tier, nFiles = nFiles)
        datasetPath = '/%s/%s/%s' % (name, name, tier)


        # Load components that are necessary to check status
        factory     = WMFactory("dbsUpload", "WMComponent.DBSUpload.Database.Interface")
        dbinterface = factory.loadObject("UploadToDBS")

        dbsInterface = DBSInterface(config = config)
        localAPI     = dbsInterface.getAPIRef()
        globeAPI     = dbsInterface.getAPIRef(globalRef = True)

        # In the first round we should create blocks for the first dataset
        # The child dataset should not be handled until the parent is uploaded
        testDBSUpload = DBSUploadPoller(config = config)
        testDBSUpload.algorithm()

        # First, see if there are any blocks
        # One in DBS, one not in DBS
        result = myThread.dbi.processData("SELECT status FROM dbsbuffer_block")[0].fetchall()
        self.assertEqual(len(result), 2)
        self.assertEqual(result, [('InGlobalDBS',), ('Open',)])


        result = myThread.dbi.processData("SELECT status FROM dbsbuffer_file WHERE dataset_algo = 1")[0].fetchall()
        for r in result:
            self.assertEqual(r[0], 'GLOBAL')


        return

    @attr('integration')
    def testF_DBSUploadQueueSizeCheckForAlerts(self):
        """
        Test will not trigger a real alert being sent unless doing some
        mocking of the methods used during DBSUploadPoller.algorithm() ->
        DBSUploadPoller.uploadBlocks() method.
        As done here, it probably can't be deterministic, yet the feature
        shall be checked.

        """
        sizeLevelToTest = 1
        myThread = threading.currentThread()
        config = self.createConfig()
        # threshold / value to check
        config.DBSUpload.alertUploadQueueSize = sizeLevelToTest

        # without this uploadBlocks method returns immediately
        name = "ThisIsATest_%s" % (makeUUID())
        tier = "RECO"
        nFiles = sizeLevelToTest + 1
        files = self.getFiles(name = name, tier = tier, nFiles = nFiles)
        datasetPath = '/%s/%s/%s' % (name, name, tier)

        # load components that are necessary to check status
        # (this seems necessary, else some previous tests started failing)
        factory = WMFactory("dbsUpload", "WMComponent.DBSUpload.Database.Interface")
        dbinterface = factory.loadObject("UploadToDBS")

        dbsInterface = DBSInterface(config = config)
        localAPI = dbsInterface.getAPIRef()
        globeAPI = dbsInterface.getAPIRef(globalRef = True)
        testDBSUpload = DBSUploadPoller(config)
        # this is finally where the action (alert) should be triggered from
        testDBSUpload.algorithm()

        return

    def testG_closeSettingsPerWorkflow(self):
        """
        _closeSettingsPerWorkflow_

        Test our ability to close blocks depending on settings
        configured for individual workflows.
        This unit test that doesn't require an actual DBS instance to run.
        """
        self.assertTrue(False, 'This unit test disabled since we do not have DBS2 mock')
        myThread = threading.currentThread()
        config = self.createConfig()
        config.DBSInterface.doGlobalMigration = False

        # First test, limit by number of files and timeout without new files
        name = "ThisIsATest_%s" % (makeUUID())
        tier = "RECO"
        nFiles = 12
        self.injectWorkflow(workflowName = name, taskPath = '/%s/Test' % name,
                            MaxFiles = 5)
        self.getFiles(name = name, tier = tier, nFiles = nFiles,
                              workflowName = name, taskPath = '/%s/Test' % name)

        # Load components that are necessary to check status
        factory     = WMFactory("dbsUpload", "WMComponent.DBSUpload.Database.Interface")
        dbinterface = factory.loadObject("UploadToDBS")

        # Change the DBSUploadPoller imports on runtime
        from WMComponent.DBSUpload import DBSUploadPoller as MockDBSUploadPoller
        #MockDBSUploadPoller.DBSInterface = DBS2Interface

        # In the first round we should create blocks for the first dataset
        # The child dataset should not be handled until the parent is uploaded
        # First run creates 3 blocks, 2 are closed immediately and one is open
        testDBSUpload = MockDBSUploadPoller.DBSUploadPoller(config = config)
        testDBSUpload.algorithm()
        openBlocks = dbinterface.findOpenBlocks()
        closedBlocks = myThread.dbi.processData("SELECT id FROM dbsbuffer_block WHERE status = 'InGlobalDBS'")[0].fetchall()
        self.assertEqual(len(openBlocks), 1)
        self.assertEqual(len(closedBlocks), 2)
        globalFiles = myThread.dbi.processData("SELECT id FROM dbsbuffer_file WHERE status = 'GLOBAL'")[0].fetchall()
        notUploadedFiles = myThread.dbi.processData("SELECT * FROM dbsbuffer_file WHERE status = 'NOTUPLOADED'")[0].fetchall()
        self.assertEqual(len(globalFiles), 12)
        self.assertEqual(len(notUploadedFiles), 1)
        self.assertTrue('child' in notUploadedFiles[0][1])
        testDBSUpload.algorithm()
        openBlocks = myThread.dbi.processData("SELECT id FROM dbsbuffer_block WHERE status != 'InGlobalDBS'")[0].fetchall()
        closedBlocks = myThread.dbi.processData("SELECT id FROM dbsbuffer_block WHERE status = 'InGlobalDBS'")[0].fetchall()
        self.assertEqual(len(openBlocks), 2)
        self.assertEqual(len(closedBlocks), 2)
        globalFiles = myThread.dbi.processData("SELECT id FROM dbsbuffer_file WHERE status = 'GLOBAL'")[0].fetchall()
        notUploadedFiles = myThread.dbi.processData("SELECT * FROM dbsbuffer_file WHERE status = 'NOTUPLOADED'")[0].fetchall()
        self.assertEqual(len(globalFiles), 13)
        self.assertEqual(len(notUploadedFiles), 0)
        # Test the timeout feature to close blocks
        myThread.dbi.processData("UPDATE dbsbuffer_workflow SET block_close_max_wait_time = 0")
        testDBSUpload.algorithm()
        openBlocks = myThread.dbi.processData("SELECT id FROM dbsbuffer_block WHERE status != 'InGlobalDBS'")[0].fetchall()
        closedBlocks = myThread.dbi.processData("SELECT id FROM dbsbuffer_block WHERE status = 'InGlobalDBS'")[0].fetchall()
        self.assertEqual(len(openBlocks), 0)
        self.assertEqual(len(closedBlocks), 4)
        # Check the information that DBS received
        dbsBlocks = testDBSUpload.dbsInterface.blocks
        for dbsBlockName in dbsBlocks:
            dbsBlock = dbsBlocks[dbsBlockName]
            self.assertEqual(dbsBlock['OpenForWriting'], '0')
            self.assertTrue(dbsBlock['nFiles'] in (1,2,5))

        # Second test, limit by number of events and timeout with new files
        name = "ThisIsATest_%s" % (makeUUID())
        nFiles = 50
        self.injectWorkflow(workflowName = name, taskPath = '/%s/Test' % name,
                            MaxFiles = 45, MaxEvents = 800, MaxWaitTime = 10000)
        self.getFiles(name = name, tier = tier, nFiles = nFiles,
                              workflowName = name, taskPath = '/%s/Test' % name)
        testDBSUpload.algorithm()
        testDBSUpload.algorithm()
        openBlocks = myThread.dbi.processData("SELECT id FROM dbsbuffer_block WHERE status != 'InGlobalDBS'")[0].fetchall()
        closedBlocks = myThread.dbi.processData("SELECT id FROM dbsbuffer_block WHERE status = 'InGlobalDBS'")[0].fetchall()
        self.assertEqual(len(openBlocks), 2)
        self.assertEqual(len(closedBlocks), 5)
        # Throw 20 new file
        # Reset the timer such that the blocks appear to have been created 10001 seconds ago
        creationTime = int(time.time() - 10001)
        myThread.dbi.processData("UPDATE dbsbuffer_block SET create_time = %d WHERE status != 'InGlobalDBS'" % creationTime)
        self.getFiles(name = name + '2', tier = tier, nFiles = 20,
                      workflowName = name, taskPath = '/%s/Test' % name,
                      noChild = True)
        # Now a new block will have to be created as the last one timed out
        testDBSUpload.algorithm()
        openBlocks = myThread.dbi.processData("SELECT id FROM dbsbuffer_block WHERE status != 'InGlobalDBS'")[0].fetchall()
        closedBlocks = myThread.dbi.processData("SELECT id FROM dbsbuffer_block WHERE status = 'InGlobalDBS'")[0].fetchall()
        self.assertEqual(len(openBlocks), 1)
        self.assertEqual(len(closedBlocks), 7)
        dbsBlocks = testDBSUpload.dbsInterface.blocks
        for dbsBlockName in dbsBlocks:
            dbsBlock = dbsBlocks[dbsBlockName]
            if name in dbsBlockName:
                if dbsBlock['OpenForWriting'] == '1':
                    self.assertEqual(dbsBlock['nFiles'], 20)
                else:
                    self.assertTrue(dbsBlock['events'] in (10,200,800))
                    self.assertTrue(dbsBlock['nFiles'] in (1,10,40))

        # Last test, check limitation by size
        name = "ThisIsATest_%s" % (makeUUID())
        nFiles = 10
        self.injectWorkflow(workflowName = name, taskPath = '/%s/Test' % name,
                            MaxFiles = 45, MaxEvents = 800, MaxSize = 2048)
        self.getFiles(name = name, tier = tier, nFiles = nFiles,
                              workflowName = name, taskPath = '/%s/Test' % name)
        testDBSUpload.algorithm()
        dbsBlocks = testDBSUpload.dbsInterface.blocks
        for dbsBlockName in dbsBlocks:
            dbsBlock = dbsBlocks[dbsBlockName]
            if name in dbsBlockName:
                self.assertEqual(dbsBlock['events'], 40)
                self.assertEqual(dbsBlock['nFiles'], 2)
                self.assertEqual(dbsBlock['size'], 2048)

        return