Exemple #1
0
    def testHeartbeat(self):
        testComponent = HeartbeatAPI("testComponent")
        testComponent.registerComponent()
        self.assertEqual(testComponent.getHeartbeatInfo(), [])

        testComponent.updateWorkerHeartbeat("testWorker")
        result = testComponent.getHeartbeatInfo()
        self.assertEqual(len(result), 1)
        self.assertEqual(result[0]['worker_name'], "testWorker")
        time.sleep(1)

        testComponent.updateWorkerHeartbeat("testWorker2")
        result = testComponent.getHeartbeatInfo()
        self.assertEqual(len(result), 1)
        self.assertEqual(result[0]['worker_name'], "testWorker2")

        time.sleep(1)
        testComponent.updateWorkerHeartbeat("testWorker")
        result = testComponent.getHeartbeatInfo()
        self.assertEqual(len(result), 1)
        self.assertEqual(result[0]['worker_name'], "testWorker")


        testComponent = HeartbeatAPI("test2Component")
        testComponent.registerComponent()
        time.sleep(1)
        testComponent.updateWorkerHeartbeat("test2Worker")

        result = testComponent.getHeartbeatInfo()
        self.assertEqual(len(result), 2)
        self.assertEqual(result[0]['worker_name'], "testWorker")
        self.assertEqual(result[1]['worker_name'], "test2Worker")

        time.sleep(1)
        testComponent.updateWorkerHeartbeat("test2Worker2")
        result = testComponent.getHeartbeatInfo()
        self.assertEqual(len(result), 2)
        self.assertEqual(result[0]['worker_name'], "testWorker")
        self.assertEqual(result[1]['worker_name'], "test2Worker2")

        time.sleep(1)
        testComponent.updateWorkerHeartbeat("test2Worker")
        result = testComponent.getHeartbeatInfo()
        self.assertEqual(len(result), 2)
        self.assertEqual(result[0]['worker_name'], "testWorker")
        self.assertEqual(result[1]['worker_name'], "test2Worker")

        testComponent.updateWorkerError("test2Worker", "Error1")
        result = testComponent.getHeartbeatInfo()
        self.assertEqual(result[1]['error_message'], "Error1")
Exemple #2
0
    def setUp(self):
        """
        _setUp_

        Standard setup: Now with 100% more couch
        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules=[
            "WMCore.WMBS", "WMCore.BossAir", "WMCore.ResourceControl",
            "WMCore.Agent.Database"
        ])
        self.testInit.setupCouch("jobsubmitter_t/jobs", "JobDump")
        self.testInit.setupCouch("jobsubmitter_t/fwjrs", "FWJRDump")
        self.testInit.setupCouch("wmagent_summary_t", "WMStats")

        myThread = threading.currentThread()
        self.daoFactory = DAOFactory(package="WMCore.WMBS",
                                     logger=myThread.logger,
                                     dbinterface=myThread.dbi)
        self.baDaoFactory = DAOFactory(package="WMCore.BossAir",
                                       logger=myThread.logger,
                                       dbinterface=myThread.dbi)

        self.testDir = self.testInit.generateWorkDir()

        # Set heartbeat
        self.componentName = 'JobSubmitter'
        self.heartbeatAPI = HeartbeatAPI(self.componentName)
        self.heartbeatAPI.registerComponent()
        self.configFile = EmulatorSetup.setupWMAgentConfig()

        return
Exemple #3
0
    def setUp(self):
        """
        _setUp_

        Setup the database and logging connection.  Try to create all of the
        WMBS tables.  Also, create some dummy locations.
        """
        super(JobCreatorTest, self).setUp()

        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()

        self.testInit.setSchema(customModules=[
            'WMCore.WMBS', 'WMCore.ResourceControl', 'WMCore.Agent.Database'
        ],
                                useDefault=False)
        self.couchdbname = "jobcreator_t"
        self.testInit.setupCouch("%s/jobs" % self.couchdbname, "JobDump")
        self.testInit.setupCouch("%s/fwjrs" % self.couchdbname, "FWJRDump")
        self.configFile = EmulatorSetup.setupWMAgentConfig()

        myThread = threading.currentThread()
        self.daoFactory = DAOFactory(package="WMCore.WMBS",
                                     logger=myThread.logger,
                                     dbinterface=myThread.dbi)

        locationAction = self.daoFactory(classname="Locations.New")
        for site in self.sites:
            locationAction.execute(siteName=site, pnn=site)

        # Create sites in resourceControl

        resourceControl = ResourceControl()
        for site in self.sites:
            resourceControl.insertSite(siteName=site, pnn=site, ceName=site)
            resourceControl.insertThreshold(siteName=site,
                                            taskType='Processing',
                                            maxSlots=10000,
                                            pendingSlots=10000)

        self.resourceControl = resourceControl

        self._setup = True
        self._teardown = False

        self.testDir = self.testInit.generateWorkDir()
        self.cwd = os.getcwd()

        # Set heartbeat
        self.componentName = 'JobCreator'
        self.heartbeatAPI = HeartbeatAPI(self.componentName)
        self.heartbeatAPI.registerComponent()

        if PY3:
            self.assertItemsEqual = self.assertCountEqual

        return
Exemple #4
0
    def testUpdateWorkers(self):
        """
        _testUpdateWorkers_

        Create a couple of components and workers and test the update methods
        """
        comp1 = HeartbeatAPI("testComponent1",
                             pollInterval=60,
                             heartbeatTimeout=600)
        comp1.registerComponent()
        comp1.registerWorker("testWorker1")
        comp1.registerWorker("testWorker2")

        comp2 = HeartbeatAPI("testComponent2",
                             pollInterval=30,
                             heartbeatTimeout=300)
        comp2.registerComponent()
        comp2.registerWorker("testWorker21")

        comp1.updateWorkerCycle("testWorker1", 1.001, None)
        comp2.updateWorkerCycle("testWorker21", 1234.1, 100)
        hb1 = comp1.getHeartbeatInfo()
        hb2 = comp2.getHeartbeatInfo()

        for worker in hb1:
            if worker['worker_name'] == 'testWorker1':
                self.assertTrue(worker["cycle_time"] > 1.0)
            else:
                self.assertEqual(worker["cycle_time"], 0)
        self.assertItemsEqual([item["outcome"] for item in hb1], [None, None])
        self.assertItemsEqual([item["error_message"] for item in hb1],
                              [None, None])

        self.assertEqual(round(hb2[0]["cycle_time"], 1), 1234.1)
        self.assertEqual(hb2[0]["outcome"], '100')
        self.assertEqual(hb2[0]["error_message"], None)

        # time to update workers with an error
        comp1.updateWorkerError("testWorker2", "BAD JOB!!!")
        hb1 = comp1.getHeartbeatInfo()
        for worker in hb1:
            if worker['worker_name'] == 'testWorker2':
                self.assertTrue(worker["last_error"] > int(time.time() - 10))
                self.assertEqual(worker["state"], "Error")
                self.assertEqual(worker["error_message"], "BAD JOB!!!")
Exemple #5
0
    def setUp(self):
        """
        _setUp_

        setUp function for unittest
        """
        # Set constants
        self.couchDB = "config_test"
        self.configURL = "RANDOM;;URL;;NAME"
        self.configString = "This is a random string"

        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(
            customModules=["WMComponent.DBS3Buffer", 'WMCore.Agent.Database'],
            useDefault=False)
        self.testInit.setupCouch(self.couchDB, "GroupUser", "ConfigCache")

        myThread = threading.currentThread()
        self.bufferFactory = DAOFactory(
            package="WMComponent.DBSBuffer.Database",
            logger=myThread.logger,
            dbinterface=myThread.dbi)
        self.buffer3Factory = DAOFactory(package="WMComponent.DBS3Buffer",
                                         logger=myThread.logger,
                                         dbinterface=myThread.dbi)

        locationAction = self.bufferFactory(
            classname="DBSBufferFiles.AddLocation")
        locationAction.execute(siteName="se1.cern.ch")
        locationAction.execute(siteName="se1.fnal.gov")
        locationAction.execute(siteName="malpaquet")

        # Set heartbeat
        self.componentName = 'JobSubmitter'
        self.heartbeatAPI = HeartbeatAPI(self.componentName)
        self.heartbeatAPI.registerComponent()

        # Set up a config cache
        configCache = ConfigCache(os.environ["COUCHURL"],
                                  couchDBName=self.couchDB)
        configCache.createUserGroup(groupname="testGroup", username='******')
        self.testDir = self.testInit.generateWorkDir()

        psetPath = os.path.join(self.testDir, "PSet.txt")
        f = open(psetPath, 'w')
        f.write(self.configString)
        f.close()

        configCache.addConfig(newConfig=psetPath, psetHash=None)
        configCache.save()
        self.configURL = "%s;;%s;;%s" % (os.environ["COUCHURL"], self.couchDB,
                                         configCache.getCouchID())
        return
Exemple #6
0
    def setUp(self):
        """
        _setUp_

        Set up vital components
        """

        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules = ["WMCore.WMBS",'WMCore.MsgService',
                                                 'WMCore.ResourceControl', 'WMCore.ThreadPool',
                                                 'WMCore.Agent.Database'],
                                useDefault = False)

        myThread = threading.currentThread()
        self.daoFactory = DAOFactory(package = "WMCore.WMBS",
                                     logger = myThread.logger,
                                     dbinterface = myThread.dbi)



        locationAction = self.daoFactory(classname = "Locations.New")
        pendingSlots  = self.daoFactory(classname = "Locations.SetPendingSlots")


        for site in self.sites:
            locationAction.execute(siteName = site, seName = 'se.%s' % (site), ceName = site)
            pendingSlots.execute(siteName = site, pendingSlots = 1000)


        #Create sites in resourceControl
        resourceControl = ResourceControl()
        for site in self.sites:
            resourceControl.insertSite(siteName = site, seName = 'se.%s' % (site), ceName = site)
            resourceControl.insertThreshold(siteName = site, taskType = 'Processing', \
                                            maxSlots = 10000, pendingSlots = 10000)


        self.testDir = self.testInit.generateWorkDir()


        # Set heartbeat
        for component in self.components:
            heartbeatAPI = HeartbeatAPI(component)
            heartbeatAPI.registerComponent()




        return
Exemple #7
0
    def setUp(self):
        """
        _setUp_

        Setup the database and logging connection.  Try to create all of the
        Heartbeat tables.  Also add some dummy locations.
        """

        self.testInit = TestInit(__file__)
        self.testInit.setLogging()  # logLevel = logging.SQLDEBUG
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules=["WMCore.Agent.Database"],
                                useDefault=False)
        self.heartbeat = HeartbeatAPI("testComponent")
Exemple #8
0
    def prepareToStart(self):
        """
        _prepareToStart_

        returns: Nothing

        Starts the initialization procedure. It is mainly an aggregation method
        so it can easily used in tests.
        """
        self.state = 'initialize'
        self.initInThread()
        # note: every component gets a (unique) name:
        # self.config.Agent.componentName
        logging.info(">>>Registering Component - %s",
                     self.config.Agent.componentName)

        if getattr(self.config.Agent, "useHeartbeat", True):
            self.heartbeatAPI = HeartbeatAPI(self.config.Agent.componentName)
            self.heartbeatAPI.registerComponent()

        logging.info('>>>Starting initialization')

        logging.info('>>>Setting default transaction')
        myThread = threading.currentThread()

        self.preInitialization()

        if myThread.sql_transaction:
            myThread.transaction.begin()

        self.initialization()
        self.postInitialization()

        if myThread.sql_transaction:
            myThread.transaction.commit()

        logging.info('>>>Committing default transaction')

        logging.info(">>>Starting worker threads")
        myThread.workerThreadManager.resumeWorkers()

        logging.info(">>>Initialization finished!\n")
        # wait for messages
        self.state = 'active'
    def prepareWorker(self, worker, idleTime):
        """
        Prepares a worker thread before running
        """
        # Work timing
        worker.idleTime = idleTime
        worker.component = self.component
        self.lock.acquire()
        self.slavecounter += 1
        worker.slaveid = "%s-%s" % (self.wtmnumber, self.slavecounter)
        self.lock.release()


        # Thread synchronisation
        worker.notifyTerminate = self.terminateSlaves
        worker.terminateCallback = self.slaveTerminateCallback
        worker.notifyPause = self.pauseSlaves
        worker.notifyResume = self.resumeSlaves
        if hasattr(self.component.config, "Agent"):
            if getattr(self.component.config.Agent, "useHeartbeat", True):
                worker.heartbeatAPI = HeartbeatAPI(self.component.config.Agent.componentName)
Exemple #10
0
    def setUp(self):
        """
        setup for test.
        """

        myThread = threading.currentThread()

        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.tearDown()
        self.testInit.setSchema(customModules=[
            "WMCore.WMBS", "WMCore.BossAir", "WMCore.ResourceControl",
            "WMCore.Agent.Database"
        ],
                                useDefault=False)
        self.testInit.setupCouch("bossair_t/jobs", "JobDump")
        self.testInit.setupCouch("bossair_t/fwjrs", "FWJRDump")

        self.daoFactory = DAOFactory(package="WMCore.WMBS",
                                     logger=myThread.logger,
                                     dbinterface=myThread.dbi)
        self.getJobs = self.daoFactory(classname="Jobs.GetAllJobs")

        #Create sites in resourceControl
        resourceControl = ResourceControl()
        for site in self.sites:
            resourceControl.insertSite(siteName=site,
                                       pnn='se.%s' % (site),
                                       cmsName=site,
                                       ceName=site,
                                       plugin="CondorPlugin",
                                       pendingSlots=1000,
                                       runningSlots=2000)
            resourceControl.insertThreshold(siteName = site, taskType = 'Processing', \
                                            maxSlots = 1000, pendingSlots = 1000)
        resourceControl.insertSite(siteName='Xanadu',
                                   pnn='se.Xanadu',
                                   cmsName=site,
                                   ceName='Xanadu',
                                   plugin="TestPlugin")
        resourceControl.insertThreshold(siteName = 'Xanadu', taskType = 'Processing', \
                                        maxSlots = 10000, pendingSlots = 10000)

        resourceControl.insertSite(siteName='jade-cms.hip.fi',
                                   pnn='madhatter.csc.fi',
                                   cmsName=site,
                                   ceName='jade-cms.hip.fi',
                                   plugin="ARCPlugin")
        resourceControl.insertThreshold(siteName = 'jade-cms.hip.fi', taskType = 'Processing', \
                                        maxSlots = 100, pendingSlots = 100)
        # using this for glite submissions
        resourceControl.insertSite(siteName='grid-ce-01.ba.infn.it',
                                   pnn='storm-se-01.ba.infn.it',
                                   cmsName=site,
                                   ceName='grid-ce-01.ba.infn.it',
                                   plugin='gLitePlugin')
        resourceControl.insertThreshold(siteName = 'grid-ce-01.ba.infn.it', taskType = 'Processing', \
                                        maxSlots = 50, pendingSlots = 50)

        # Create user
        newuser = self.daoFactory(classname="Users.New")
        newuser.execute(dn="tapas", group_name="phgroup", role_name="cmsrole")

        # We actually need the user name
        self.user = getpass.getuser()

        # Change this to the working dir to keep track of error and log files from condor
        self.testDir = self.testInit.generateWorkDir()

        # Set heartbeat
        componentName = 'test'
        self.heartbeatAPI = HeartbeatAPI(componentName)
        self.heartbeatAPI.registerComponent()
        componentName = 'JobTracker'
        self.heartbeatAPI2 = HeartbeatAPI(componentName)
        self.heartbeatAPI2.registerComponent()

        return
Exemple #11
0
    def __init__(self, slaveClassName, totalSlaves, componentDir,
                 config, namespace = 'WMComponent', inPort = '5555',
                 outPort = '5558'):
        """
        __init__

        Constructor for the process pool.  The slave class name must be based
        inside the WMComponent namespace.  For examples, the JobAccountant would
        pass in 'JobAccountant.AccountantWorker' to run the AccountantWorker
        class.  All log files will be stored in the component directory that is
        passed in.  Each slave will have its own log file.

        Note that the config is only used to determine database connection
        parameters.  It is not passed to the slave class.  The slaveInit
        parameter will be serialized and passed to the slave class's
        constructor.
        """
        self.enqueueIndex = 0
        self.dequeueIndex = 0
        self.runningWork  = 0

        #Use the Services.Requests JSONizer, which handles __to_json__ calls
        self.jsonHandler = JSONRequests()

        # heartbeat should be registered at this point
        if getattr(config.Agent, "useHeartbeat", True):
            self.heartbeatAPI = HeartbeatAPI(getattr(config.Agent, "componentName", "ProcPoolSlave"))

        self.slaveClassName = slaveClassName
        self.componentDir   = componentDir
        self.config         = config
        # Grab the python version from the current version
        # Assume naming convention pythonA.B, i.e., python2.4 for v2.4.X
        majorVersion = sys.version_info[0]
        minorVersion = sys.version_info[1]

        if majorVersion and minorVersion:
            self.versionString = "python%i.%i" % (majorVersion, minorVersion)
        else:
            self.versionString = "python2.6"

        self.workers   = []
        self.nSlaves   = totalSlaves
        self.namespace = namespace
        self.inPort    = inPort
        self.outPort   = outPort


        # Pickle the config
        self.configPath = os.path.join(componentDir, '%s_config.pkl' % slaveClassName)
        if os.path.exists(self.configPath):
            # Then we note it and overwrite it
            msg =  "Something's in the way of the ProcessPool config: %s" % self.configPath
            logging.error(msg)
        f = open(self.configPath, 'w')
        cPickle.dump(config, f)
        f.close()

        # Set up ZMQ
        try:
            context = zmq.Context()
            self.sender = context.socket(zmq.PUSH)
            self.sender.bind("tcp://*:%s" % inPort)
            self.sink = context.socket(zmq.PULL)
            self.sink.bind("tcp://*:%s" % outPort)
        except zmq.ZMQError:
            # Try this again in a moment to see
            # if it's just being held by something pre-existing
            import time
            time.sleep(1)
            logging.error("Blocked socket on startup: Attempting sleep to give it time to clear.")
            try:
                context = zmq.Context()
                self.sender = context.socket(zmq.PUSH)
                self.sender.bind("tcp://*:%s" % inPort)
                self.sink = context.socket(zmq.PULL)
                self.sink.bind("tcp://*:%s" % outPort)
            except Exception as ex:
                msg =  "Error attempting to open TCP sockets\n"
                msg += str(ex)
                logging.error(msg)
                import traceback
                print traceback.format_exc()
                raise ProcessPoolException(msg)

        # Now actually create the slaves
        self.createSlaves()


        return
Exemple #12
0
    def testAddComponent(self):
        """
        _testAddComponent_

        Test creation of components and worker threads as well as the
        get heartbeat DAOs
        """
        comp1 = HeartbeatAPI("testComponent1",
                             pollInterval=60,
                             heartbeatTimeout=600)
        comp1.registerComponent()
        self.assertEqual(comp1.getHeartbeatInfo(), [])  # no worker thread yet

        comp1.registerWorker("testWorker1")
        self.assertEqual(len(comp1.getHeartbeatInfo()), 1)

        comp1.registerWorker("testWorker2")
        self.assertEqual(len(comp1.getHeartbeatInfo()), 2)

        comp2 = HeartbeatAPI("testComponent2",
                             pollInterval=30,
                             heartbeatTimeout=300)
        comp2.registerComponent()
        self.assertEqual(comp2.getHeartbeatInfo(), [])  # no worker thread yet
        self.assertEqual(len(comp2.getAllHeartbeatInfo()), 2)

        comp2.registerWorker("testWorker21")
        self.assertEqual(len(comp2.getHeartbeatInfo()), 1)
        self.assertEqual(len(comp2.getAllHeartbeatInfo()), 3)

        comp1.updateWorkerHeartbeat("testWorker1", "Running")
        comp1.updateWorkerHeartbeat("testWorker2", "Running")
        comp2.updateWorkerHeartbeat("testWorker21", "Running")
        self.assertEqual(len(comp1.getAllHeartbeatInfo()), 3)
        self.assertEqual(len(comp2.getAllHeartbeatInfo()), 3)

        comp1Res = comp1.getHeartbeatInfo()
        comp2Res = comp2.getHeartbeatInfo()
        self.assertEqual(len(comp1Res), 2)
        self.assertEqual(len(comp2Res), 1)

        self.assertItemsEqual([item["name"] for item in comp1Res],
                              ["testComponent1", "testComponent1"])
        self.assertItemsEqual([item["worker_name"] for item in comp1Res],
                              ["testWorker1", "testWorker2"])
        self.assertItemsEqual([item["state"] for item in comp1Res],
                              ["Running", "Running"])
        self.assertItemsEqual([item["poll_interval"] for item in comp1Res],
                              [60, 60])
        self.assertItemsEqual([item["update_threshold"] for item in comp1Res],
                              [600, 600])

        self.assertItemsEqual([item["name"] for item in comp2Res],
                              ["testComponent2"])
        self.assertItemsEqual([item["worker_name"] for item in comp2Res],
                              ["testWorker21"])
        self.assertItemsEqual([item["state"] for item in comp2Res],
                              ["Running"])
        self.assertItemsEqual([item["poll_interval"] for item in comp2Res],
                              [30])
        self.assertItemsEqual([item["update_threshold"] for item in comp2Res],
                              [300])