def run(self): # set up signal handling signal.signal(signal.SIGTERM, self.kill) signal.signal(signal.SIGINT, self.kill) # make the reporting thread self.reporting_thread = threading.Thread() self.reporting_thread.run = self.reporting self.reporting_thread.start() # launch the process and capture the pid pipe = popen2.Popen4(self.cmd) self.input, self.pid = pipe.fromchild, pipe.pid # report that we have started the program rpc_connection = None try: rpc_connection = CONNECT.mon_client() self.report_start(self.pid, rpc_connection, self.session) except: print "MON_WRAPPER_ENGINE: Failed to report start" rpc_connection.close() else: rpc_connection.close() # parse and pipe input while 1: line = "" try: line = self.input.readline() except IOError: continue if line == "": break # cmd died, break to find out why print line, sys.stdout.flush() line = line.strip() for regexp, match_func, act_func in self.simulation_events: match = regexp.match(line) try: event_data = match_func(match) except: pass else: self.simulation_event_queue.Produce((act_func, event_data)) # end of process detect exit code exit_code = pipe.wait() print "\ngot exit code %d - should exit in at most a couple of seconds" % exit_code self.simulation_event_queue.Produce( (self.report_exit, (exit_code, time.time()))) self.reporting_thread.join()
def reporting(self): try: while 1: time.sleep(.5) self.seconds_since_last_poll += .5 if self.seconds_since_last_poll < self.poll_interval and not self.exited: continue self.seconds_since_last_poll = 0 print "MON ------ Polling ----------------" try: if not self.rpc_connection_active: print "MON ------- Connection inactive reconnecting ---------" self.rpc_connection = CONNECT.mon_client(10) self.rpc_connection_active = 1 #rpc_connection=DUMMY_RPC() except: print "MON ------ Failed to get rpc_connection for reporting --------" traceback.print_exc() self.rpc_connection.close() self.rpc_connection = None self.rpc_connection_active = 0 time.sleep(10) pass else: events = self.simulation_event_queue.Consume_All() for event_function, event_data in events: #print "events func=%s data=%s"%(repr(event_function),repr(event_data)) try: ret = event_function(self.pid, self.rpc_connection, self.session, event_data) #print "ret=%s"%ret except EXIT_REPORTING_EXCEPTION: raise EXIT_REPORTING_EXCEPTION except: print "MON ------ Error doing reporting event -------" traceback.print_exc() self.rpc_connection = None self.rpc_connection_active = 0 continue for poll_function in self.polled_events: try: poll_function(self.pid, self.rpc_connection, self.session) except: print "MON ------ Error doing poll function ------" traceback.print_exc() except EXIT_REPORTING_EXCEPTION: self.rpc_connection.close() self.rpc_connection = None pass
run_directory = os.path.dirname(binary) print run_directory if run_directory == "." or run_directory == "": run_directory = os.getcwd() node_count = int(nodes_str) sim_command = binary + " " + " ".join(command) os.chdir(run_directory) ############################# # make a mon session ############################# from pd.common import CONNECT mon = None mon_sessiond = None try: mon = CONNECT.mon_client() mon_session = mon.Create_Session(os.environ["USER"])['id'] mon.Label_Session(mon_session, name) except: mon.close() else: mon.close() if not mon_session: print "Failed to register mon session, server possibly not running" sys.exit(1) ############################# # write the script file ############################# script = """#!/bin/tcsh cd %s