import sys try: testTaskType = sys.argv[1] except Exception: testTaskType = 'test' from pandajedi.jedicore.JediTaskBufferInterface import JediTaskBufferInterface from pandajedi.jediddm.DDMInterface import DDMInterface import multiprocessing from pandajedi.jediorder import WatchDog tbIF = JediTaskBufferInterface() tbIF.setupInterface() ddmIF = DDMInterface() ddmIF.setupInterface() parent_conn, child_conn = multiprocessing.Pipe() watchDog = multiprocessing.Process(target=WatchDog.launcher, args=(child_conn,tbIF,ddmIF, 'atlas',testTaskType)) watchDog.start()
taskBufferIF = JediTaskBufferInterface() taskBufferIF.setupInterface() """ from pandajedi.jediconfig import jedi_config from pandajedi.jedicore import JediTaskBuffer taskBufferIF = JediTaskBuffer.JediTaskBuffer(None) from pandajedi.jediddm.DDMInterface import DDMInterface ddmIF = DDMInterface() ddmIF.setupInterface() ddmIF = ddmIF.getInterface("atlas") datasetName = "valid1.159025.ParticleGenerator_gamma_E100.recon.AOD.e3099_s2082_r6012_tid04635343_00" lostFiles = set(["AOD.04635343._000012.pool.root.1", "AOD.04635343._000015.pool.root.1"]) def resetStatusForLostFileRecovery(datasetName, lostFiles): # get jeditaskid varMap = {} varMap[":type1"] = "log" varMap[":type2"] = "output" varMap[":name1"] = datasetName varMap[":name2"] = datasetName.split(":")[-1] sqlGI = "SELECT jediTaskID,datasetID FROM {0}.JEDI_Datasets ".format(jedi_config.db.schemaJEDI) sqlGI += "WHERE type IN (:type1,:type2) AND datasetName IN (:name1,:name2) "
def start(self): # start zombi cleaner ZombiCleaner().start() # setup DDM I/F ddmIF = DDMInterface() ddmIF.setupInterface() # setup TaskBuffer I/F taskBufferIF = JediTaskBufferInterface() taskBufferIF.setupInterface() # setup intra-node message queue broker proxies if hasattr(jedi_config, 'mq') and hasattr( jedi_config.mq, 'configFile') and jedi_config.mq.configFile: from pandajedi.jediorder.JediMsgProcessor import MsgProcAgent mq_agent = MsgProcAgent(config_file=jedi_config.mq.configFile) mb_proxy_dict = mq_agent.start_passive_mode(prefetch_size=999) # the list of JEDI knights knightList = [] # setup TaskRefiner for itemStr in jedi_config.taskrefine.procConfig.split(';'): items = self.convParams(itemStr) vo = items[0] plabel = items[1] nProc = items[2] for iproc in range(nProc): parent_conn, child_conn = multiprocessing.Pipe() proc = multiprocessing.Process( target=self.launcher, args=('pandajedi.jediorder.TaskRefiner', child_conn, taskBufferIF, ddmIF, vo, plabel)) proc.start() knightList.append(proc) # setup TaskBrokerage for itemStr in jedi_config.taskbroker.procConfig.split(';'): items = self.convParams(itemStr) vo = items[0] plabel = items[1] nProc = items[2] for iproc in range(nProc): parent_conn, child_conn = multiprocessing.Pipe() proc = multiprocessing.Process( target=self.launcher, args=('pandajedi.jediorder.TaskBroker', child_conn, taskBufferIF, ddmIF, vo, plabel)) proc.start() knightList.append(proc) # setup ContentsFeeder for itemStr in jedi_config.confeeder.procConfig.split(';'): items = self.convParams(itemStr) vo = items[0] plabel = items[1] nProc = items[2] for iproc in range(nProc): parent_conn, child_conn = multiprocessing.Pipe() proc = multiprocessing.Process( target=self.launcher, args=('pandajedi.jediorder.ContentsFeeder', child_conn, taskBufferIF, ddmIF, vo, plabel)) proc.start() knightList.append(proc) # setup JobGenerator for itemStr in jedi_config.jobgen.procConfig.split(';'): items = self.convParams(itemStr) vo = items[0] plabel = items[1] nProc = items[2] cloud = items[3] try: loop_cycle = items[4] except IndexError: loop_cycle = None if not isinstance(cloud, list): cloud = [cloud] for iproc in range(nProc): parent_conn, child_conn = multiprocessing.Pipe() proc = ProcessWrapper(target=self.launcher, args=('pandajedi.jediorder.JobGenerator', child_conn, taskBufferIF, ddmIF, vo, plabel, cloud, True, True, loop_cycle)) proc.start() knightList.append(proc) # setup PostProcessor for itemStr in jedi_config.postprocessor.procConfig.split(';'): items = self.convParams(itemStr) vo = items[0] plabel = items[1] nProc = items[2] for iproc in range(nProc): parent_conn, child_conn = multiprocessing.Pipe() proc = multiprocessing.Process( target=self.launcher, args=('pandajedi.jediorder.PostProcessor', child_conn, taskBufferIF, ddmIF, vo, plabel)) proc.start() knightList.append(proc) # setup TaskCommando for itemStr in jedi_config.tcommando.procConfig.split(';'): items = self.convParams(itemStr) vo = items[0] plabel = items[1] nProc = items[2] for iproc in range(nProc): parent_conn, child_conn = multiprocessing.Pipe() proc = multiprocessing.Process( target=self.launcher, args=('pandajedi.jediorder.TaskCommando', child_conn, taskBufferIF, ddmIF, vo, plabel)) proc.start() knightList.append(proc) # setup WatchDog for itemStr in jedi_config.watchdog.procConfig.split(';'): items = self.convParams(itemStr) vo = items[0] plabel = items[1] nProc = items[2] subStr = items[3] if len(items) > 3 else None period = items[4] if len(items) > 4 else None for iproc in range(nProc): parent_conn, child_conn = multiprocessing.Pipe() proc = multiprocessing.Process( target=self.launcher, args=('pandajedi.jediorder.WatchDog', child_conn, taskBufferIF, ddmIF, vo, plabel, subStr, period)) proc.start() knightList.append(proc) # setup JediMsgProcessor agent (only one system process) if hasattr(jedi_config, 'msgprocessor') and hasattr( jedi_config.msgprocessor, 'configFile') and jedi_config.msgprocessor.configFile: stop_event = multiprocessing.Event() self.stopEventList.append(stop_event) parent_conn, child_conn = multiprocessing.Pipe() proc = multiprocessing.Process( target=self.launcher, args=('pandajedi.jediorder.JediMsgProcessor', stop_event)) proc.start() knightList.append(proc) # setup JediDaemon agent (only one system process) if hasattr(jedi_config, 'daemon') and hasattr( jedi_config.daemon, 'enable') and jedi_config.daemon.enable: parent_conn, child_conn = multiprocessing.Pipe() proc = multiprocessing.Process( target=self.launcher, args=('pandajedi.jediorder.JediDaemon', taskBufferIF, ddmIF)) proc.start() knightList.append(proc) # check initial failures time.sleep(5) for knight in knightList: if not knight.is_alive(): timeNow = datetime.datetime.utcnow() print( "{0} {1}: ERROR pid={2} died in initialization".format( str(timeNow), self.__class__.__name__, knight.pid)) os.killpg(os.getpgrp(), signal.SIGKILL) # join for knight in knightList: knight.join()
def start(self): # start zombi cleaner ZombiCleaner().start() # setup DDM I/F ddmIF = DDMInterface() ddmIF.setupInterface() # setup TaskBuffer I/F taskBufferIF = JediTaskBufferInterface() taskBufferIF.setupInterface() # the list of JEDI knights knightList = [] # setup TaskRefiner for itemStr in jedi_config.taskrefine.procConfig.split(';'): items = self.convParams(itemStr) vo = items[0] plabel = items[1] nProc = items[2] for iproc in range(nProc): parent_conn, child_conn = multiprocessing.Pipe() proc = multiprocessing.Process(target=self.launcher, args=('pandajedi.jediorder.TaskRefiner', child_conn,taskBufferIF,ddmIF, vo,plabel)) proc.start() knightList.append(proc) # setup TaskBrokerage for itemStr in jedi_config.taskbroker.procConfig.split(';'): items = self.convParams(itemStr) vo = items[0] plabel = items[1] nProc = items[2] for iproc in range(nProc): parent_conn, child_conn = multiprocessing.Pipe() proc = multiprocessing.Process(target=self.launcher, args=('pandajedi.jediorder.TaskBroker', child_conn,taskBufferIF,ddmIF, vo,plabel)) proc.start() knightList.append(proc) # setup ContentsFeeder for itemStr in jedi_config.confeeder.procConfig.split(';'): items = self.convParams(itemStr) vo = items[0] plabel = items[1] nProc = items[2] for iproc in range(nProc): parent_conn, child_conn = multiprocessing.Pipe() proc = multiprocessing.Process(target=self.launcher, args=('pandajedi.jediorder.ContentsFeeder', child_conn,taskBufferIF,ddmIF, vo,plabel)) proc.start() knightList.append(proc) # setup JobGenerator for itemStr in jedi_config.jobgen.procConfig.split(';'): items = self.convParams(itemStr) vo = items[0] plabel = items[1] nProc = items[2] cloud = items[3] if not isinstance(cloud,list): cloud = [cloud] for iproc in range(nProc): parent_conn, child_conn = multiprocessing.Pipe() proc = ProcessWrapper(target=self.launcher, args=('pandajedi.jediorder.JobGenerator', child_conn,taskBufferIF,ddmIF, vo,plabel,cloud,True,True)) proc.start() knightList.append(proc) # setup PostProcessor for itemStr in jedi_config.postprocessor.procConfig.split(';'): items = self.convParams(itemStr) vo = items[0] plabel = items[1] nProc = items[2] for iproc in range(nProc): parent_conn, child_conn = multiprocessing.Pipe() proc = multiprocessing.Process(target=self.launcher, args=('pandajedi.jediorder.PostProcessor', child_conn,taskBufferIF,ddmIF, vo,plabel)) proc.start() knightList.append(proc) # setup TaskCommando for itemStr in jedi_config.tcommando.procConfig.split(';'): items = self.convParams(itemStr) vo = items[0] plabel = items[1] nProc = items[2] for iproc in range(nProc): parent_conn, child_conn = multiprocessing.Pipe() proc = multiprocessing.Process(target=self.launcher, args=('pandajedi.jediorder.TaskCommando', child_conn,taskBufferIF,ddmIF, vo,plabel)) proc.start() knightList.append(proc) # setup WatchDog for itemStr in jedi_config.watchdog.procConfig.split(';'): items = self.convParams(itemStr) vo = items[0] plabel = items[1] nProc = items[2] for iproc in range(nProc): parent_conn, child_conn = multiprocessing.Pipe() proc = multiprocessing.Process(target=self.launcher, args=('pandajedi.jediorder.WatchDog', child_conn,taskBufferIF,ddmIF, vo,plabel)) proc.start() knightList.append(proc) # check initial failures time.sleep(5) for knight in knightList: if not knight.is_alive(): timeNow = datetime.datetime.utcnow() print "{0} {1}: ERROR pid={2} died in initialization".format(str(timeNow), self.__class__.__name__, knight.pid) os.killpg(os.getpgrp(),signal.SIGKILL) # join for knight in knightList: knight.join()
class PandaToJediMsgProcPlugin(BaseMsgProcPlugin): def initialize(self): BaseMsgProcPlugin.initialize(self) self.ddmIF = DDMInterface() self.ddmIF.setupInterface() self.pid = '{0}-{1}_{2}-pjmsg'.format(socket.getfqdn().split('.')[0], os.getpid(), os.getpgrp()) def process(self, msg_obj, decoded_data=None): # logger tmp_log = logger_utils.make_logger(base_logger, method_name='process') # start tmp_log.info('start') # parse if decoded_data is None: # json decode try: msg_dict = json.loads(msg_obj.data) except Exception as e: err_str = 'failed to parse message json {2} , skipped. {0} : {1}'.format(e.__class__.__name__, e, msg_obj.data) tmp_log.error(err_str) raise else: msg_dict = decoded_data # run try: tmp_log.debug('got message {0}'.format(msg_dict)) if msg_dict['msg_type'] == 'generate_job': # get task to generate jobs jediTaskID = int(msg_dict['taskid']) s, taskSpec = self.tbIF.getTaskWithID_JEDI(jediTaskID) if not taskSpec: tmp_log.debug('unknown task {}'.format(jediTaskID)) else: # get WQ vo = taskSpec.vo prodSourceLabel = taskSpec.prodSourceLabel workQueue = self.tbIF.getWorkQueueMap().getQueueWithIDGshare(taskSpec.workQueue_ID, taskSpec.gshare) # get inputs tmpList = self.tbIF.getTasksToBeProcessed_JEDI(self.pid, None, workQueue, None, None, nFiles=1000, target_tasks=[jediTaskID]) if tmpList: inputList = ListWithLock(tmpList) # create thread threadPool = ThreadPool() siteMapper = self.tbIF.getSiteMapper() taskSetupper = TaskSetupper(vo, prodSourceLabel) taskSetupper.initializeMods(self.tbIF, self.ddmIF) gen = JobGeneratorThread(inputList, threadPool, self.tbIF, self.ddmIF, siteMapper, True, taskSetupper, self.pid, workQueue, 'pjmsg', None, None, None, False) gen.start() gen.join() else: tmp_log.debug('unknown message type : {}'.format(msg_dict['msg_type'])) except Exception as e: err_str = 'failed to run, skipped. {0} : {1}'.format(e.__class__.__name__, e) tmp_log.error(err_str) raise # done tmp_log.info('done')