def testPublishUnique(self): """ Test the publishUnique function """ ms1 = MessageService() ms1.registerAs("uniqueComponent1") ms2 = MessageService() ms2.registerAs("uniqueComponent2") ms2.subscribeTo("uniqueMessage") ms1.publishUnique("uniqueMessage","1") ms1.publishUnique("uniqueMessage","2") ms1.commit() #first message sent should be only one retrieved type, payload = ms2.get(wait = False) self.assertEqual(type, "uniqueMessage") self.assertEqual(payload, "1") type, payload = ms2.get(wait = False) self.assertEqual(type, None) self.assertEqual(payload, None) ms2.commit() ms1.purgeMessages() ms2.purgeMessages()
def removePendingEvents(self, componentName): """ _removePendingEvents_ Remove all pending events from the message service for the component specified. Returns the number of events removed """ try: ms = MessageService() # remove messages for component by subscribing to them as # that component and pulling them all out ms.registerAs(componentName) count = 0 while True: type, payload = ms.get(wait = False) if type == None: break else: count += 1 ms.commit() return count except StandardError, ex: msg = "Error while removing pending messages for:\n" msg += "Component Name %s\n" % componentName msg += str(ex) return Fault(1, msg)
class ComponentServerTest(unittest.TestCase): """ TestCase implementation for ServerTest """ def setUp(self): print "******Start ComponentServerTest (ErrorHandler) ***********" print "\nThis test depends on data generated by the JobState_t.py" print " and FwkJobReport_t.py tests and should NOT be run" print " separately, but only in a test suite " print " Make sure ONLY the error handler component is running!" print " " # we use this for event publication. self.ms=MessageService() self.ms.registerAs("TestComponent") # subscribe on the events this test produces # so we can verify this in the database self.ms.subscribeTo("CreateJob") self.ms.subscribeTo("GeneralJobFailure") self.ms.subscribeTo("SubmitJob") self.outputPath=os.getenv('PRODAGENT_WORKDIR') def testA(self): print("""\nPublish events to turn ErrorHandler logging on""") try: self.ms.publish("ErrorHandler:StartDebug", "none") self.ms.publish("JobCleanup:StartDebug", "none") self.ms.commit() except StandardError, ex: msg = "Failed testA\n" msg += str(ex) self.fail(msg)
def sendMessage(self, event, payload): """sendMessage Publishes a ProdAgent message in the ProdAgent DB. """ ms = MessageService() ms.registerAs("Test") if payload != None: ms.publish(event, payload) else: ms.publish(event, "") ms.commit()
class ProxyLife: def __init__(self, dBlite, path, dictSE, additionalParams={}, minim = 3600*36): self.proxiespath = path if minim < 3600*6: minim = 3600*6 self.minimumleft = minim self.bossCfgDB = dBlite self.dictSE = dictSE # stuff needed for glExec renewal technicalities self.useGlExecDelegation = additionalParams.get("glExecDelegation", 'false')=='true' # register self.ms = MessageService() self.ms.registerAs("TaskLifeManager") # preserv proxyes notified for expiring self.__allproxies = [] ## preserv proxy nitified for cleaning self.__cleanproxies = [] ## clean script #logging.info("Cleaning old script...") #self.delOldScript() ############################################### ###### SYSTEM INTERACTIONS ###### def executeCommand(self, command): import commands status, outp = commands.getstatusoutput(command) return outp def cleanFiles(self, files): self.executeCommand( "rm -f %s "% str(files) ) logging.debug("Executed command: %s "% str("rm -f " + str(files))) def delOldScript(self): workdir = os.getenv("PRODAGENT_WORKDIR") dirwk = os.path.join( workdir, "TaskLifeManager" ) try: files = os.listdir(dirwk) for filet in files: if filet == '.' or filet == '..' or \ not (filet.startswith("deleteSB_") and filet.endswith("_.py")): continue self.cleanFiles( os.path.join( dirwk, filet ) ) except Exception, ex: logging.info("Problem cleaning old script: %s"% str(ex))
def publishRssItem(self, payload): """ Arguments: payload -- the message Return: none """ ms = MessageService() ms.registerAs("RssFeeder") ms.publish("RssFeeder:AddFile", payload) ms.commit() ms.close()
def doSubmit(self): """ _doSubmit_ Perform bulk or single submission as needed based on the class data populated by the component that is invoking this plugin """ # create message service ms = MessageService() # register ms.registerAs("JobEmulatorBulkSubmitter") for jobSpec, cacheDir in self.toSubmit.items(): logging.debug("SpecFile = %s" % self.specFiles[jobSpec]) ms.publish("EmulateJob", self.specFiles[jobSpec]) ms.commit() logging.debug("EmulateJob message sent") return
class TriggerUnitTests(unittest.TestCase): """ TestCase for TriggerAPI module """ _triggerSet = False def setUp(self): if not TriggerUnitTests._triggerSet: Session.set_database(dbConfig) Session.connect() Session.start_transaction() print "\n**************Start TriggerUnitTests**********" self.ms=MessageService() self.ms.registerAs("TriggerTest") self.trigger=TriggerAPI(self.ms) self.triggers=5 self.jobspecs=5 self.flags=5 TriggerUnitTests._triggerSet=True Session.commit_all() Session.close_all() def testA(self): Session.set_database(dbConfig) Session.connect() Session.start_transaction() try: print("\nCreate job spec ids") for j in xrange(0,self.jobspecs): JobStateChangeAPI.register("jobSpec"+str(j),"Processing",3,1) except StandardError, ex: msg = "Failed TestA:\n" msg += str(ex) self.fail(msg) Session.commit_all() Session.close_all()
class ComponentServerTest(unittest.TestCase): """ TestCase implementation for ServerTest """ def setUp(self): # we use this for event publication. self.ms=MessageService() self.ms.registerAs("TestComponent") self.ms.subscribeTo("SubmitJob") self.failedJobs=1000 self.successJobs=1000 self.maxRetries=2 self.outputPath=os.getenv('PRODAGENT_WORKDIR') def testA(self): try: print("--->sending debug events") self.ms.publish("ErrorHandler:StartDebug", "none") self.ms.publish("JobCleanup:StartDebug", "none") self.ms.commit() except StandardError, ex: msg = "Failed testA\n" msg += str(ex) self.fail(msg)
class AdminControlInterface: """ _AdminControlInterface_ AdminControl XMLRPC interface object Expose accessor methods to retrieve information from the MessageService and JobState DB tables, and allow users to publish events. Every method should return either simple type structures (value, lists, dicts etc) or an xmlrpclib.Fault if there is an error """ def __init__(self): self.ms = MessageService() self.ms.registerAs("AdminControlInterface") self.status = MessageServiceStatus() def publishEvent(self, eventName, payload = ""): """ _publishEvent_ Publish an event into the ProdAgent MessageService """ try: self.ms.publish(eventName, payload) self.ms.commit() return 0 except StandardError, ex: msg = "Error publishing Event: %s\n" msg += "Payload: %s\n" msg += "From AdminControl\n" msg += "Exception: %s" % str(ex) result = Fault(1, msg) return result
def recreateJob(jobspecFile, jobQueue): """ re-create the processing job """ # remove entries from tr_Trigger/Action tables to be on safer side clean_tr_tables(jobspecFile) # create job if not merge spec = JobSpec() spec.load(jobspecFile) # // # // clean spec id from the job queue #// No easy way to do this in JobQueueAPI so use nekkid SQL for now Session.set_database(dbConfig) Session.connect() sqlStr1 = "DELETE FROM jq_queue WHERE job_spec_id=\"%s\"; " % spec.parameters['JobName'] Session.execute(sqlStr1) Session.commit_all() if spec.parameters['JobType'] in ('Processing', 'CleanUp', 'LogCollect', 'Harvesting'): # publish CreateJob print "- Resubmit Processing job" print "--> Publishing CreateJob for %s"%jobspecFile ms = MessageService() ms.registerAs("Test") if jobQueue: ms.publish("QueueJob", jobspecFile) else: ms.publish("CreateJob", jobspecFile) ms.commit() elif spec.parameters['JobType']=="Merge" : try: jobname=spec.parameters['JobName'] except Exception,ex: msg = "Problem extracting jobspec name from JobSpec File: %s Details: %s"%(jobspecFile,str(ex)) print msg return print "- Resubmit Merge job" print "--> Publishing GeneralJobFailures for %s"%jobname ms = MessageService() ms.registerAs("TestMA") ms.publish("GeneralJobFailure", jobname) ms.commit() time.sleep(1) print "--> Publishing MergeSensor:ReSubmit for %s"%jobname ms = MessageService() ms.registerAs("Test") ms.publish("MergeSensor:ReSubmit", jobname) ms.commit()
try: opts, args = getopt.getopt(sys.argv[1:], "", valid) except getopt.GetoptError, ex: print usage print str(ex) sys.exit(1) workflow = None for opt, arg in opts: if opt == "--workflow": workflow = arg if workflow == None: print "--workflow option not provided" print usage sys.exit(1) if not os.path.exists(workflow): print "workflow not found: %s" % workflow sys.exit(1) ## use MessageService ms = MessageService() ## register message service instance as "Test" ms.registerAs("Test") ms.publish("DBSInterface:StartDebug",'') ms.commit() ms.publish("NewDataset",workflow) ms.commit()
class ComponentServerTest(unittest.TestCase): """ TestCase implementation for ServerTest """ _triggerSet = False def setUp(self): Session.set_database(dbConfig) Session.connect() Session.start_transaction() if not ComponentServerTest._triggerSet: print "\n****Start ComponentServerTest (JobCleanup)*******" # we use this for event publication. self.ms=MessageService() self.ms.registerAs("JobCleanupTest") self.jobSpecs=1000 self.location='/tmp/prodagent/components/JobCleanup/cacheDirs' self.failureJobSpecs=1000 self.flags=5 self.trigger=TriggerAPI(self.ms) # create some directories in tmp print('\nCreating directories in the /tmp area to serve '+ \ 'as job cache dirs') for i in xrange(0,self.jobSpecs): try: os.makedirs(self.location+'/jobSpecDir_'+str(i)) # create some files (some of which should not be deleted, # by a partial cleanup) file1=open(self.location+'/jobSpecDir_'+str(i)+'/JobSpec.xml','w') file1.close() file2=open(self.location+'/jobSpecDir_'+str(i)+'/FrameworkJobReport.xml','w') file2.close() file3=open(self.location+'/jobSpecDir_'+str(i)+'/JobTarFile.tar.gz','w') file3.close() file4=open(self.location+'/jobSpecDir_'+str(i)+'/Pretend2BeADir1.txt','w') file4.close() file5=open(self.location+'/jobSpecDir_'+str(i)+'/Pretend2BeADir2.txt','w') file5.close() except: raise # create jobcaches that need to be tarred and then removed: for i in xrange(0,self.failureJobSpecs): try: os.makedirs(self.location+'/failureJobSpecDir_'+str(i)) file1=open(self.location+'/failureJobSpecDir_'+str(i)+'/JobSpec.xml','w') file1.close() file2=open(self.location+'/failureJobSpecDir_'+str(i)+'/FrameworkJobReport.xml','w') file2.close() file3=open(self.location+'/failureJobSpecDir_'+str(i)+'/JobTarFile.tar.gz','w') file3.close() file4=open(self.location+'/failureJobSpecDir_'+str(i)+'/aFile.txt','w') file4.close() os.makedirs(self.location+'/failureJobSpecDir_'+str(i)+'/aDir1') file5=open(self.location+'/failureJobSpecDir_'+str(i)+'/aDir1/File.txt','w') file5.close() os.makedirs(self.location+'/failureJobSpecDir_'+str(i)+'/aDir2') file6=open(self.location+'/failureJobSpecDir_'+str(i)+'/aDir2/aFile.txt','w') file6.close() os.makedirs(self.location+'/failureJobSpecDir_'+str(i)+'/aDir3') file7=open(self.location+'/failureJobSpecDir_'+str(i)+'/aDir3/aFile.txt','w') file7.close() except: raise ComponentServerTest._triggerSet=True Session.commit_all() Session.close_all() def testA(self): print("""\npublish events to turn JobCleanup logging on""") try: Session.set_database(dbConfig) Session.connect() Session.start_transaction() self.ms.publish("JobCleanup:StartDebug", "none") self.ms.commit() Session.commit_all() Session.close_all() except StandardError, ex: msg = "Failed testA:\n" msg += str(ex) self.fail(msg)
class JabberThread(Thread): def __init__(self, summoner, log, throughput): """ @ summoner: class that invoked the Jabber and that maintains the go_on_accepting_load attribute @ log: logging system @ throughput: avrerage number of seconds per task that must be granted @ ms: messageService class for listening completed tasks """ Thread.__init__(self) self.summoner = summoner self.logsys = log self.thr = throughput self.go_on_accepting_load = 1 self.start() pass def run(self): """ JabberThread main loop: get task completion messages, sample time and evaluate the throughput. If the time exceeds too much the requirements (+10% to avoid fluctuations) then stop accepting new tasks. """ self.logsys.info("Starting JabberThread") self.ms = MessageService() self.ms.registerAs("CRAB_CmdMgr_jabber") # messages implying meaningful network load self.ms.subscribeTo("CRAB_Cmd_Mgr:NewTask") self.ms.subscribeTo("CRAB_Cmd_Mgr:NewCommand") import time tPre = time.time() if int(self.thr) == 0: self.go_on_accepting_load = 2 self.logsys.info("Stopping accepting load") count = 0 while True: # get messages type, payload = None, None try: type, payload = self.ms.get() self.ms.commit() except Exception, exc: self.logsys.error("ERROR: problem interacting with the message service") self.logsys.error(str(exc)) time.sleep(2) continue self.logsys.debug("JabberThread: %s %s" %(type, payload) ) tPost = time.time() deltaT = tPost - tPre if count%2000 == 0: self.logsys.info("AvgThroughput: %f s (%d connections / day)"%(deltaT, int(0.5+86400/(deltaT+1)) ) ) count = 0 count += 1 # jabber disabled if self.thr < 0.0: continue # alter the guard on the proxy service if int(self.thr) != 0: if deltaT > 1.1 * self.thr: self.go_on_accepting_load = 0 #False self.logsys.info("Stopping accepting load") else: self.go_on_accepting_load = 1 #True pass
class RelValInjectorComponent: """ _RelValInjectorComponent_ Component to inject, trace and manage RelVal jobs """ def __init__(self, **args): self.args = {} self.args['Logfile'] = None self.args['FastJob'] = 250 self.args['MediumJob'] = 100 self.args['SlowJob'] = 50 self.args['VerySlowJob'] = 25 self.args['SitesList'] = None self.args['PollInterval'] = "00:10:00" self.args['MigrateToGlobal'] = False self.args['InjectToPhEDEx'] = False self.args.update(args) if self.args['Logfile'] == None: self.args['Logfile'] = os.path.join(self.args['ComponentDir'], "ComponentLog") # // # // Job class number of events should be ints #// self.args['FastJob'] = int(self.args['FastJob']) self.args['MediumJob'] = int(self.args['MediumJob']) self.args['SlowJob'] = int(self.args['SlowJob']) self.args['VerySlowJob'] = int(self.args['VerySlowJob']) self.args['Fast'] = self.args['FastJob'] self.args['Medium'] = self.args['MediumJob'] self.args['Slow'] = self.args['SlowJob'] self.args['VerySlow'] = self.args['VerySlowJob'] # // # // List of sites to get RelVal jobs #// self.sites = [] for sitename in self.args['SitesList'].split(','): if len(sitename.strip()) > 0: self.sites.append(sitename.strip()) # // # // manage migration and injection #// if str(self.args['MigrateToGlobal']).lower() in ("true", "yes"): self.args['MigrateToGlobal'] = True else: self.args['MigrateToGlobal'] = False if str(self.args['InjectToPhEDEx']).lower() in ("true", "yes"): self.args['InjectToPhEDEx'] = True else: self.args['InjectToPhEDEx'] = False if self.args['MigrateToGlobal'] == False: # Cant inject without migration self.args['InjectToPhEDEx'] = False LoggingUtils.installLogHandler(self) msg = "RelValInjector Component Started:\n" msg += " Migrate to Global DBS: %s\n" % self.args['MigrateToGlobal'] msg += " Inject to PhEDEx: %s\n" % self.args['InjectToPhEDEx'] msg += "Jobs to be sent to Sites:\n" for site in self.sites: msg += " ==> %s\n" % site logging.info(msg) def __call__(self, message, payload): """ _operator()_ Respond to messages """ logging.debug("Message Recieved: %s %s" % (message, payload)) if message == "RelValInjector:StartDebug": logging.getLogger().setLevel(logging.DEBUG) return if message == "AdminControl:EndDebug": logging.getLogger().setLevel(logging.INFO) return if message == "RelValInjector:Inject": self.inject(payload) return if message == "RelValInjector:Poll": self.poll() return def poll(self): """ _poll_ Polling loop response to check status of RelVal jobs being tracked """ logging.info("RelValInjector.poll()") # // # // Poll WorkflowEntities to find all workflows owned by #// this component relvalWorkflows = WEUtils.listWorkflowsByOwner("RelValInjector") workflows = WEWorkflow.get(relvalWorkflows) if type(workflows) != type(list()) : workflows = [workflows] for workflow in workflows: if workflow != 0: logging.debug( "Polling for state of workflow: %s\n" % str(workflow['id'])) status = RelValStatus(self.args, self.ms, **workflow) status() self.ms.publish("RelValInjector:Poll", "", self.args['PollInterval']) self.ms.commit() return def startComponent(self): """ _startComponent_ Start the component and subscribe to messages """ self.ms = MessageService() # register self.ms.registerAs("RelValInjector") # subscribe to messages self.ms.subscribeTo("RelValInjector:StartDebug") self.ms.subscribeTo("RelValInjector:EndDebug") self.ms.subscribeTo("RelValInjector:Inject") self.ms.subscribeTo("JobSuccess") self.ms.subscribeTo("GeneralJobFailure") self.ms.subscribeTo("RelValInjector:Poll") self.ms.publish("RelValInjector:Poll", "", self.args['PollInterval']) self.ms.commit() while True: Session.set_database(dbConfig) Session.connect() Session.start_transaction() type, payload = self.ms.get() self.ms.commit() logging.debug("RelValInjector: %s, %s" % (type, payload)) self.__call__(type, payload) Session.commit_all() Session.close_all() def inject(self, relValSpecFile): """ _inject_ Given the relVal spec file provided, take that and generate workflows and jobs for all sites """ if not os.path.exists(relValSpecFile): msg = "Cannot load RelVal Spec File:\n %s\n" % relValSpecFile msg += "File does not exist..." logging.error(msg) return specMgr = RelValSpecMgr(relValSpecFile, self.sites, **self.args) specMgr.ms = self.ms try: tests = specMgr() except Exception, ex: msg = "Error invoking RelValSpecMgr for file\n" msg += "%s\n" % relValSpecFile msg += str(ex) logging.error(msg) return
## check workflow existing on disk workflow=os.path.expandvars(os.path.expanduser(workflow)) if not os.path.exists(workflow): print "Workflow not found: %s" % workflow sys.exit(1) ## get the workflow name workflowSpec = WorkflowSpec() workflowSpec.load(workflow) workflowName = workflowSpec.workflowName() workflowBase=os.path.basename(workflow) ## use MessageService ms = MessageService() ## register message service instance as "Test" ms.registerAs("TestSkim") ## Debug level ms.publish("DatasetInjector:StartDebug","none") ms.publish("JobCreator:StartDebug","none") ms.publish("JobSubmitter:StartDebug","none") ms.publish("DBSInterface:StartDebug","none") ms.publish("ErrorHandler:StartDebug","none") ms.publish("TrackingComponent:StartDebug","none") ms.commit() ## Set Creator ms.publish("JobCreator:SetCreator","T0LSFCreator") ## Set Submitter ms.publish("JobSubmitter:SetSubmitter","T0LSFSubmitter") ## Set Workflow
class ProdMgrUnitTests(unittest.TestCase): """ TestCase for ProdMgr module which tests the component which involves interaction with the ProdMgr """ # keep track of some jobspecs __jobSpecId=[] def setUp(self): # we use this for event publication. self.ms=MessageService() self.ms.registerAs("TestComponent") # subscribe on the events this test produces # so we can verify this in the database self.ms.subscribeTo("CreateJob") self.requests=5 self.prodMgrUrl='https://localhost:8443/clarens/' self.jobReportDir='/tmp/prodAgent/ProdMgrInterface/jobReportDir' try: os.makedirs(self.jobReportDir) except: pass def testA(self): try: ###shell start#### self.ms.publish("ProdMgrInterface:StartDebug",'') self.ms.commit() # this means we are using the size the allocation gives us self.ms.publish("ProdMgrInterface:JobSize",'-1') self.ms.commit() # this means that if we get a job from the prodmgr we cut it in jobs with this number # of events. self.ms.publish("ProdMgrInterface:JobCutSize",'12') self.ms.commit() self.ms.publish("ProdMgrInterface:AddRequest",'https://localhost:8443/clarens/?Request_id=requestID0?Priority=3') self.ms.commit() self.ms.publish("ProdMgrInterface:ResourcesAvailable",'4') self.ms.commit() print('Waiting for 4*9=36 creatjobs') ###shell end #### for i in xrange(0,36): type, payload = self.ms.get() print("Message type: "+str(type)+", payload: "+str(payload)) # retrieve the job spec id (jobname) jobspec=JobSpec() jobspec.load(payload) ProdMgrUnitTests.__jobSpecId.append(jobspec.parameters['JobName']) self.ms.commit() for jobspecid in ProdMgrUnitTests.__jobSpecId: print("handling jobspecid: "+str(jobspecid)) reportFile='FrameworkJobReport.xml' report=readJobReport(reportFile) for fileinfo in report[-1].files: if fileinfo['TotalEvents'] != None: fileinfo['TotalEvents'] = 9 report[-1].jobSpecId=jobspecid report[-1].status="Success" reportLocation=self.jobReportDir+'/'+jobspecid.replace('/','_')+".xml" report[-1].write(reportLocation) self.ms.publish("JobSuccess", reportLocation) self.ms.commit() ###shell start#### self.ms.publish("ProdMgrInterface:ResourcesAvailable",'10') self.ms.commit() print('Waiting for 10*9=90 creatjobs') ###shell end #### ProdMgrUnitTests.__jobSpecId=[] for i in xrange(0,90): type, payload = self.ms.get() print("Message type: "+str(type)+", payload: "+str(payload)) # retrieve the job spec id (jobname) jobspec=JobSpec() jobspec.load(payload) ProdMgrUnitTests.__jobSpecId.append(jobspec.parameters['JobName']) self.ms.commit() for jobspecid in ProdMgrUnitTests.__jobSpecId: print("handling jobspecid: "+str(jobspecid)) reportFile='FrameworkJobReport.xml' report=readJobReport(reportFile) for fileinfo in report[-1].files: if fileinfo['TotalEvents'] != None: fileinfo['TotalEvents'] = 9 report[-1].jobSpecId=jobspecid report[-1].status="Success" reportLocation=self.jobReportDir+'/'+jobspecid.replace('/','_')+".xml" report[-1].write(reportLocation) self.ms.publish("JobSuccess", reportLocation) self.ms.commit() sys.exit(0) ###shell start#### self.ms.publish("ProdMgrInterface:AddRequest",'https://localhost:8443/clarens/?Request_id=requestID1?Priority=4') self.ms.commit() self.ms.publish("ProdMgrInterface:ResourcesAvailable",'10') self.ms.commit() print('Waiting for 10 creatjobs') ###shell end #### ProdMgrUnitTests.__jobSpecId=[] for i in xrange(0,10): type, payload = self.ms.get() print("Message type: "+str(type)+", payload: "+str(payload)) # retrieve the job spec id (jobname) jobspec=JobSpec() jobspec.load(payload) ProdMgrUnitTests.__jobSpecId.append(jobspec.parameters['JobName']) self.ms.commit() for jobspecid in ProdMgrUnitTests.__jobSpecId: print("handling jobspecid: "+str(jobspecid)) reportFile='FrameworkJobReport.xml' report=readJobReport(reportFile) for fileinfo in report[-1].files: if fileinfo['TotalEvents'] != None: fileinfo['TotalEvents'] = 3 report[-1].jobSpecId=jobspecid report[-1].status="Success" reportLocation=self.jobReportDir+'/'+jobspecid.replace('/','_')+".xml" report[-1].write(reportLocation) self.ms.publish("JobSuccess", reportLocation) self.ms.commit() print('ProdMgr is left with 8 allocations as 2 allocations successfully finished') ###shell start#### self.ms.publish("ProdMgrInterface:RemoveIdlingAllocs",'00:00:01') print('All idling allocations should have been removed since the used a small time interval') self.ms.commit() self.ms.publish("ProdMgrInterface:ResourcesAvailable",'20') self.ms.commit() print('ProdAgent should now have 20 active allocations (inlcuding the ones that where removed)') ###shell end #### ProdMgrUnitTests.__jobSpecId=[] for i in xrange(0,20): type, payload = self.ms.get() print("Message type: "+str(type)+", payload: "+str(payload)) # retrieve the job spec id (jobname) jobspec=JobSpec() jobspec.load(payload) ProdMgrUnitTests.__jobSpecId.append(jobspec.parameters['JobName']) self.ms.commit() #raw_input("Shut down the server to test queueing capability (check the log to see when no more messages enter)\n") for jobspecid in ProdMgrUnitTests.__jobSpecId[0:4]: print("handling jobspecid: "+str(jobspecid)) reportFile='FrameworkJobReport.xml' report=readJobReport(reportFile) for fileinfo in report[-1].files: if fileinfo['TotalEvents'] != None: fileinfo['TotalEvents'] = 3 report[-1].jobSpecId=jobspecid report[-1].status="Success" reportLocation=self.jobReportDir+'/'+jobspecid.replace('/','_')+".xml" report[-1].write(reportLocation) self.ms.publish("JobSuccess", reportLocation) self.ms.commit() #raw_input("Start server again\n") for jobspecid in ProdMgrUnitTests.__jobSpecId[4:]: print("handling jobspecid: "+str(jobspecid)) reportFile='FrameworkJobReport.xml' report=readJobReport(reportFile) for fileinfo in report[-1].files: if fileinfo['TotalEvents'] != None: fileinfo['TotalEvents'] = 3 report[-1].jobSpecId=jobspecid report[-1].status="Success" reportLocation=self.jobReportDir+'/'+jobspecid.replace('/','_')+".xml" report[-1].write(reportLocation) self.ms.publish("JobSuccess", reportLocation) self.ms.commit() ###shell start#### print('Adding a non existing request') self.ms.publish("ProdMgrInterface:AddRequest",'https://localhost:8443/clarens/?Request_id=NOTEXISTINGREQUEST?Priority=1') self.ms.commit() print('Adding a request that alrady finished ') self.ms.publish("ProdMgrInterface:AddRequest",'https://localhost:8443/clarens/?Request_id=requestID0?Priority=2') self.ms.commit() self.ms.publish("ProdMgrInterface:AddRequest",'https://localhost:8443/clarens/?Request_id=requestID2?Priority=5') print('There should now be 3 additional requests in the request queue') self.ms.commit() self.ms.publish("ProdMgrInterface:JobSize",'5') self.ms.commit() self.ms.publish("ProdMgrInterface:ResourcesAvailable",'10') self.ms.commit() ###shell end #### ProdMgrUnitTests.__jobSpecId=[] for i in xrange(0,10): type, payload = self.ms.get() print("Message type: "+str(type)+", payload: "+str(payload)) # retrieve the job spec id (jobname) jobspec=JobSpec() jobspec.load(payload) ProdMgrUnitTests.__jobSpecId.append(jobspec.parameters['JobName']) self.ms.commit() for jobspecid in ProdMgrUnitTests.__jobSpecId[0:4]: print("handling jobspecid: "+str(jobspecid)) reportFile='FrameworkJobReport.xml' report=readJobReport(reportFile) for fileinfo in report[-1].files: if fileinfo['TotalEvents'] != None: fileinfo['TotalEvents'] = 2 report[-1].jobSpecId=jobspecid report[-1].status="Success" reportLocation=self.jobReportDir+'/'+jobspecid.replace('/','_')+".xml" report[-1].write(reportLocation) self.ms.publish("JobSuccess", reportLocation) self.ms.commit() for jobspecid in ProdMgrUnitTests.__jobSpecId[4:]: print("handling jobspecid: "+str(jobspecid)) reportFile='FrameworkJobReport.xml' report=readJobReport(reportFile) for fileinfo in report[-1].files: if fileinfo['TotalEvents'] != None: fileinfo['TotalEvents'] = 5 report[-1].jobSpecId=jobspecid report[-1].status="Success" reportLocation=self.jobReportDir+'/'+jobspecid.replace('/','_')+".xml" report[-1].write(reportLocation) self.ms.publish("JobSuccess", reportLocation) self.ms.commit() ###shell start#### print('Emitting resources available which should get allocations of multiple requests') self.ms.publish("ProdMgrInterface:ResourcesAvailable",'15') self.ms.commit() print('There should be now 15 active allocations and the finished request and nonexisting request are removed') ###shell end #### ProdMgrUnitTests.__jobSpecId=[] for i in xrange(0,15): type, payload = self.ms.get() print("Message type: "+str(type)+", payload: "+str(payload)) # retrieve the job spec id (jobname) jobspec=JobSpec() jobspec.load(payload) ProdMgrUnitTests.__jobSpecId.append(jobspec.parameters['JobName']) self.ms.commit() for jobspecid in ProdMgrUnitTests.__jobSpecId: print("handling jobspecid: "+str(jobspecid)) reportFile='FrameworkJobReport.xml' report=readJobReport(reportFile) for fileinfo in report[-1].files: if fileinfo['TotalEvents'] != None: fileinfo['TotalEvents'] = 3 report[-1].jobSpecId=jobspecid report[-1].status="Success" reportLocation=self.jobReportDir+'/'+jobspecid.replace('/','_')+".xml" report[-1].write(reportLocation) self.ms.publish("GeneralJobFailure", reportLocation) self.ms.commit() ###shell start#### self.ms.publish("ProdMgrInterface:AddRequest",'https://localhost:8443/clarens/?Request_id=requestID5?Priority=3') self.ms.commit() self.ms.publish("ProdMgrInterface:AddRequest",'https://localhost:8443/clarens/?Request_id=requestID6?Priority=3') self.ms.commit() self.ms.publish("ProdMgrInterface:AddRequest",'https://localhost:8443/clarens/?Request_id=requestID7?Priority=3') self.ms.commit() self.ms.publish("ProdMgrInterface:ResourcesAvailable",'15') self.ms.commit() ###shell end #### except StandardError, ex: msg = "Failed testA\n" msg += str(ex) self.fail(msg)
class MessageServiceUnitTests(unittest.TestCase): """ TestCase for MessageService module """ def setUp(self): print """ Message Service test with connections closed and forced refreshed connections inside transactions. See logfile for more details""" # define logging # create log handler logHandler = RotatingFileHandler("logfile","a", 1000000, 3) logFormatter = logging.Formatter("%(asctime)s:%(message)s") logHandler.setFormatter(logFormatter) logging.getLogger().addHandler(logHandler) logging.getLogger().setLevel(logging.DEBUG) # create message service instance self.ms = MessageService() # create components self.ms.registerAs("Component1") self.ms.registerAs("Component2") # subscribe Component2 to messages of type MessageType1 self.ms.subscribeTo("MessageType1") def testA(self): # purge messages print "Purging messages" self.ms.purgeMessages() # Component1 sends 10 messages self.ms.registerAs("Component1") print "Component1 sends messages: ", for index in range(10): self.ms.publish("MessageType1",str(index)) print index, print "" self.ms.commit() # Component2 gets them self.ms.registerAs("Component2") print "Component2 gets: ", for index in range(10): type, payload = self.ms.get(wait = False) print payload, print "" self.ms.commit() # Close connection inside a transaction print "Close connection inside a transaction" self.ms.registerAs("Component1") print "Sending first message" self.ms.publish("MessageType1","11") print "Closing connection!" self.ms.conn.close() print "Sending second message" self.ms.publish("MessageType1","12") self.ms.commit() # Component2 should get both self.ms.registerAs("Component2") print "Component2 gets: ", for index in range(2): type, payload = self.ms.get(wait = False) print payload, print "" self.ms.commit() print "Transaction was recovered!" # Force refresh self.ms.refreshPeriod = 0 print "Force a refresh event" self.ms.registerAs("Component1") print "Sending first message" self.ms.publish("MessageType1","14") print "Sending second message" self.ms.publish("MessageType1","15") print "Committing" self.ms.commit() print "Sending third message" self.ms.publish("MessageType1","16") print "Committing" self.ms.commit() # Component2 should get all three self.ms.registerAs("Component2") print "Component2 gets: ", for index in range(3): type, payload = self.ms.get(wait = False) print payload, print "" self.ms.commit()
class RelValDatasets: """RelValDatasets Main Class for querying and display the production status of the input datasets. """ def __init__(self, **args): self.ms = MessageService() self.ms.registerAs("Test") self.datasets = [] self.debug = False self.parameters = {} self.parameters['inputFile'] = None self.parameters.update(args) self.with_threads = False if self.parameters.get('dbsURL', None) is not None: self.dbs_url = self.parameters['dbsURL'] else: self.getLocalDBS() self.getGlobalDBS() def readFile(self): """readFile() Reads the provided file. It produces a list of dictionaries, each entry contains: - name - expectedEvents - unmergedName """ self.max_length = 0 file = open(self.parameters['inputFile'], 'r') if self.debug: print "Parsing file: %s" % self.parameters['inputFile'] for line in file: if line.startswith('#'): # Any comment? continue dataset = {} read_info = [ x.strip() for x in line.strip("\n").strip().split() if x != ""] if len(read_info) == 1: # No expected events found read_info = [9000, read_info[0]] if not read_info: # Blank line? continue dataset['name'] = read_info[1] if len(dataset['name']) > self.max_length: self.max_length = len(dataset['name']) dataset['expectedEvents'] = int(read_info[0]) # Producing the unmerged dataset name unmerged_parts = [x for x in dataset['name'].split('/') if x != ""] dataset['unmergedName'] = "/".join(['', unmerged_parts[0], "-".join([unmerged_parts[1], 'unmerged']), unmerged_parts[2]]) self.datasets.append(dataset) if self.debug: print dataset file.close() def startQueryingService(self, threads=4): """startQueryingService Launches threads for querying to DBS. The number of threads can be controlled by the threads argument. This is a pool of threads that pick up jobs from an input pool and store result in an output pool. """ # Declares queue which will be later used by the threads self.input_queue = Queue() # Queue where the results will be stored self.output_queue = Queue() self.lock = threading.Lock() self.threads = {} for i in range(threads): if self.debug: print "Starting querying sevice number", i self.threads[i] = DBSQueryThread(self.input_queue, self.output_queue, self.lock) self.threads[i].start() def areThreadsRuninng(self): """areThreadsRuninng Verifies if the DBS threads are still running. It first verifies if the input queue is empty. A small sleep time is needed in order to let the threads run freely. """ time.sleep(1) if self.input_queue.empty(): for i in self.threads: if not self.threads[i].idle: return True return False else: return True def killThreads(self): """killThreads Kill all the threads setting its killing flag to True. It seems like this method is needed otherwise I can't get the shell back after executing the threads. Anyways, I don't like to have threads running if I am not using them. """ for i in self.threads: self.threads[i].kill_me = True def waitForDrainQueue(self): """waitForDrainQueue This method loops until the thread pool is done with the input work queue. It will also handle some interruptions. In case something goes wring this method will kill the running threads and the raise an exception """ try: while self.areThreadsRuninng(): if self.debug: print "Results so far:", self.output_queue.qsize() pass except KeyboardInterrupt, ex: self.killThreads() msg = "Execution cancelled by user." raise KeyboardInterrupt, msg except Exception, ex: self.killThreads() msg = "Unexpected exception:" msg += ex raise Exception, msg
class CrabJobCreatorComponent: """ _CrabJobCreatorComponent_ """ ################################ # Standard Component Core Methods ################################ def __init__(self, **args): self.args = {} self.args.setdefault('Logfile', None) self.args.setdefault('CacheDir', None) self.args.setdefault('ProxiesDir', None) self.args.setdefault('CopyTimeOut', '') # SE support parameters # Protocol = local cannot be the default. Any default allowd # for this parameter... it must be defined from config file. self.args.setdefault('Protocol', '') self.args.setdefault('storageName', 'localhost') self.args.setdefault('storagePort', '') self.args.setdefault('storagePath', self.args["CacheDir"]) # specific delegation strategy for glExex self.args.setdefault('glExecDelegation', 'false') self.args.setdefault('PollInterval',60) self.args.setdefault("HeartBeatDelay", "00:05:00") self.args.update(args) if len(self.args["HeartBeatDelay"]) != 8: self.HeartBeatDelay="00:05:00" else: self.HeartBeatDelay=self.args["HeartBeatDelay"] # define log file if self.args['Logfile'] == None: self.args['Logfile'] = os.path.join(self.args['ComponentDir'], "ComponentLog") # create log handler logHandler = RotatingFileHandler(self.args['Logfile'], "a", 1000000, 7) # define log format logFormatter = logging.Formatter("%(asctime)s:%(message)s") logHandler.setFormatter(logFormatter) logging.getLogger().addHandler(logHandler) logging.getLogger().setLevel(logging.INFO) ## volatile properties self.wdir = self.args['ComponentDir'] self.maxThreads = int( self.args.get('maxThreads', 5) ) self.timePoolDB = self.args['PollInterval'] # shared sessions self.blDBsession = BossLiteAPI('MySQL', dbConfig, makePool=True) self.sessionPool = self.blDBsession.bossLiteDB.getPool() self.workerPool = self.blDBsession.bossLiteDB.getPool() # Get configuration self.init = WMInit() self.init.setLogging() self.init.setDatabaseConnection(os.getenv("DATABASE"), \ os.getenv('DIALECT'), os.getenv("DBSOCK")) self.myThread = threading.currentThread() self.factory = WMFactory("msgService", "WMCore.MsgService."+ \ self.myThread.dialect) self.newMsgService = self.myThread.factory['msgService']\ .loadObject("MsgService") self.ms = MessageService() self.workerCfg = {} logging.info(" ") logging.info("Starting component...") def startComponent(self): """ _startComponent_ Start up the component """ # Registration in oldMsgService self.ms.registerAs("CrabJobCreatorComponent") self.ms.subscribeTo("JobFailed") self.ms.subscribeTo("JobSuccess") self.ms.subscribeTo("CrabJobCreatorComponent:EndDebug") # Registration in new MsgService self.myThread.transaction.begin() self.newMsgService.registerAs("CrabJobCreatorComponent") self.myThread.transaction.commit() self.ms.subscribeTo("CrabJobCreatorComponent:HeartBeat") self.ms.remove("CrabJobCreatorComponent:HeartBeat") self.ms.publish("CrabJobCreatorComponent:HeartBeat","",self.HeartBeatDelay) self.ms.commit() self.workerCfg = self.prepareBaseStatus() compWMObject = WMObject() manager = WorkerThreadManager(compWMObject) manager.addWorker(CrabJobCreatorPoller(self.workerCfg), float(self.timePoolDB)) ####################################### try: while True: try: event, payload = self.ms.get( wait = False ) if event is None: time.sleep( self.ms.pollTime ) continue else: self.__call__(event, payload) self.ms.commit() except Exception, exc: logging.error("ERROR: Problem managing message...") logging.error(str(exc)) except Exception, e: logging.error(e) logging.info(traceback.format_exc()) return
if collect['Scenario'] is None: msg = 'Scenario not provided.' print usage print msg sys.exit(1) if collect['RunNumber'] is None: msg = 'You should provide --run.' print usage print msg sys.exit(1) if plugin is not None and plugin not in valid_plugins: msg = 'Invalid plugin.' print usage print msg sys.exit(1) ms = MessageService() ms.registerAs("CLI") if plugin is not None: print "Changing DQMinjector plugin to %s" % plugin ms.publish("DQMInjector:SetPlugin", str(plugin)) ms.commit() print "Publishing DQM workflow creation: %s" % str(collect) ms.publish("DQMInjector:Collect", str(collect)) ms.commit()
# Load the workflow spec and ensure it has the TotalEvents and # EventsPerJob settings in the parameters if not os.path.exists(workflow): raise RuntimeError, "Workflow not found: %s" % workflow workflowSpec = WorkflowSpec() try: workflowSpec.load(workflow) except Exception, ex: msg = "Unable to read workflow file:\n%s\n" % workflow msg += str(ex) raise RuntimeError, msg ms = MessageService() ms.registerAs("injectMCStreamerWorkflow") ms.publish("NewWorkflow", workflow) ms.commit() # # Load PA and T0 configs # paConfig = loadProdAgentConfiguration() tier0Config = loadConfigurationFile(paConfig.getConfig("RunConfig")["OfflineConfDB"]) # # Connect to T0AST # t0astDBConfig = paConfig.getConfig("Tier0DB") t0astDBConn = Tier0DB.Tier0DB(t0astDBConfig,