def _init_runner(self): """ Initialize the roslaunch runner """ if self.config is None: raise RLException("config is not initialized") if self.pm is None: raise RLException("pm is not initialized") if self.server is None: raise RLException("server is not initialized") self.runner = roslaunch.launch.ROSLaunchRunner( self.run_id, self.config, server_uri=self.server.uri, pmon=self.pm, is_core=self.is_core, remote_runner=self.remote_runner, is_rostest=self.is_rostest) # print runner info to user, put errors last to make the more visible if self.is_core: print("ros_comm version %s" % (self.config.params['/rosversion'].value)) print(self.config.summary(local=self.remote_runner is None)) if self.config: for err in self.config.config_errors: printerrlog("WARNING: %s" % err)
def start(self): """ Start the remote process. This will create an SSH connection to the remote host. """ self.started = False #won't set to True until we are finished self.ssh = self.sshin = self.sshout = self.ssherr = None with self.lock: name = self.name m = self.machine if m.user is not None: printlog( "remote[%s]: creating ssh connection to %s:%s, user[%s]" % (name, m.address, m.ssh_port, m.user)) else: printlog("remote[%s]: creating ssh connection to %s:%s" % (name, m.address, m.ssh_port)) _logger.info("remote[%s]: invoking with ssh exec args [%s]" % (name, ' '.join(self.args))) sshvals, msg = self._ssh_exec(' '.join(self.args), m.address, m.ssh_port, m.user, m.password) if sshvals is None: printerrlog("remote[%s]: failed to launch on %s:\n\n%s\n\n" % (name, m.name, msg)) return False self.ssh, self.sshin, self.sshout, self.ssherr = sshvals printlog("remote[%s]: ssh connection created" % name) self.started = True return True
def start(self): """ Start the remote process. This will create an SSH connection to the remote host. """ self.started = False #won't set to True until we are finished self.ssh = self.sshin = self.sshout = self.ssherr = None try: self.lock.acquire() name = self.name m = self.machine if m.user is not None: printlog("remote[%s]: creating ssh connection to %s:%s, user[%s]"%(name, m.address, m.ssh_port, m.user)) else: printlog("remote[%s]: creating ssh connection to %s:%s"%(name, m.address, m.ssh_port)) _logger.info("remote[%s]: invoking with ssh exec args [%s], env: %s"%(name, ' '.join(self.args), self.env)) sshvals, msg = self._ssh_exec(' '.join(self.args), self.env, m.address, m.ssh_port, m.user, m.password) if sshvals is None: printerrlog("remote[%s]: failed to launch on %s:\n\n%s\n\n"%(name, m.name, msg)) return False self.ssh, self.sshin, self.sshout, self.ssherr = sshvals printlog("remote[%s]: ssh connection created"%name) self.started = True return True finally: self.lock.release()
def _init_runner(self): """ Initialize the roslaunch runner """ if self.config is None: raise RLException("config is not initialized") if self.pm is None: raise RLException("pm is not initialized") if self.server is None: raise RLException("server is not initialized") self.runner = roslaunch.launch.ROSLaunchRunner( self.run_id, self.config, server_uri=self.server.uri, pmon=self.pm, is_core=self.is_core, remote_runner=self.remote_runner, is_rostest=self.is_rostest, ) # print runner info to user, put errors last to make the more visible if self.is_core: print "ros_comm version %s" % (self.config.params["/rosversion"].value) print self.config.summary(local=self.remote_runner is None) if self.config: for err in self.config.config_errors: printerrlog("WARNING: %s" % err)
def _post_run(self): logger.info("ProcessMonitor._post_run %s" % self) # this is already true entering, but go ahead and make sure self.is_shutdown = True # killall processes on run exit q = Queue() q.join() with self.plock: # make copy of core_procs for threadsafe usage core_procs = self.core_procs[:] logger.info("ProcessMonitor._post_run %s: remaining procs are %s" % (self, self.procs)) # enqueue all non-core procs in reverse order for parallel kill # #526/885: ignore core procs [q.put(p) for p in reversed(self.procs) if not p in core_procs] # use 10 workers killers = [] for i in range(10): t = _ProcessKiller(q, i) killers.append(t) t.start() # wait for workers to finish q.join() shutdown_errors = [] # accumulate all the shutdown errors for t in killers: shutdown_errors.extend(t.errors) del killers[:] # #526/885: kill core procs last # we don't want to parallelize this as the master has to be last for p in reversed(core_procs): _kill_process(p, shutdown_errors) # delete everything except dead_list logger.info( "ProcessMonitor exit: cleaning up data structures and signals") with self.plock: del core_procs[:] del self.procs[:] del self.core_procs[:] reacquire_signals = self.reacquire_signals if reacquire_signals: reacquire_signals.clear() logger.info("ProcessMonitor exit: pmon has shutdown") self.done = True if shutdown_errors: printerrlog("Shutdown errors:\n" + '\n'.join([" * %s" % e for e in shutdown_errors]))
def _assume_failed(self, nodes, failed): """ Utility routine for logging/recording nodes that failed :param nodes: list of nodes that are assumed to have failed, ``Node`` :param failed: list of names of nodes that have failed to extend, ``[str]`` """ str_nodes = ["%s/%s"%(n.package, n.type) for n in nodes] failed.extend(str_nodes) printerrlog("Launch of the following nodes most likely failed: %s"%', '.join(str_nodes))
def _assume_failed(self, nodes, failed): """ Utility routine for logging/recording nodes that failed :param nodes: list of nodes that are assumed to have failed, ``Node`` :param failed: list of names of nodes that have failed to extend, ``[str]`` """ str_nodes = ["%s/%s" % (n.package, n.type) for n in nodes] failed.extend(str_nodes) printerrlog("Launch of the following nodes most likely failed: %s" % ', '.join(str_nodes))
def _post_run(self): logger.info("ProcessMonitor._post_run %s" % self) # this is already true entering, but go ahead and make sure self.is_shutdown = True # killall processes on run exit q = Queue() q.join() with self.plock: # make copy of core_procs for threadsafe usage core_procs = self.core_procs[:] logger.info("ProcessMonitor._post_run %s: remaining procs are %s" % (self, self.procs)) # enqueue all non-core procs in reverse order for parallel kill # #526/885: ignore core procs [q.put(p) for p in reversed(self.procs) if not p in core_procs] # use 10 workers killers = [] for i in range(10): t = _ProcessKiller(q, i) killers.append(t) t.start() # wait for workers to finish q.join() shutdown_errors = [] # accumulate all the shutdown errors for t in killers: shutdown_errors.extend(t.errors) del killers[:] # #526/885: kill core procs last # we don't want to parallelize this as the master has to be last for p in reversed(core_procs): _kill_process(p, shutdown_errors) # delete everything except dead_list logger.info("ProcessMonitor exit: cleaning up data structures and signals") with self.plock: del core_procs[:] del self.procs[:] del self.core_procs[:] reacquire_signals = self.reacquire_signals if reacquire_signals: reacquire_signals.clear() logger.info("ProcessMonitor exit: pmon has shutdown") self.done = True if shutdown_errors: printerrlog("Shutdown errors:\n" + "\n".join([" * %s" % e for e in shutdown_errors]))
def relaunch_nodes(self): succeeded = [] failed = [] api = self.getapi() if self.nodes_xml is not None: try: _logger.debug("sending [%s] XML [\n%s\n]" % (self.uri, self.nodes_xml)) code, msg, val = api.launch(self.nodes_xml) if code == 1: c_succ, c_fail = val succeeded.extend(c_succ) failed.extend(c_fail) else: printerrlog('error launching on [%s, uri %s]: %s' % (self.name, self.uri, msg)) except socket.error as e: errno, msg = e printerrlog('error launching on [%s, uri %s]: %s' % (self.name, self.uri, str(msg))) except socket.gaierror as e: errno, msg = e # usually errno == -2. See #815. child_host, _ = network.parse_http_host_and_port(self.uri) printerrlog( "Unable to contact remote roslaunch at [%s]. This is most likely due to a network misconfiguration with host lookups. Please make sure that you can contact '%s' from this machine" % (child.uri, child_host)) except Exception as e: printerrlog('error launching on [%s, uri %s]: %s' % (self.name, self.uri, str(e))) else: _logger.error("xml provided is not defined")
def test_printlog(self): from roslaunch.core import add_printlog_handler, add_printerrlog_handler, printlog, printlog_bold, printerrlog add_printlog_handler(printlog_cb) add_printlog_handler(printlog_cb_exception) add_printerrlog_handler(printlog_cb) add_printerrlog_handler(printlog_cb_exception) #can't really test functionality, just make sure it doesn't crash global _lastmsg _lastmsg = None printlog('foo') self.assertEquals('foo', _lastmsg) printlog_bold('bar') self.assertEquals('bar', _lastmsg) printerrlog('baz') self.assertEquals('baz', _lastmsg)
def stop(self, errors=None): """ Terminate this process, including the SSH connection. """ if errors is None: errors = [] try: self.lock.acquire() if not self.ssh: return # call the shutdown API first as closing the SSH connection # won't actually kill the process unless it triggers SIGPIPE try: api = self.getapi() if api is not None: #TODO: probably need a timeout on this api.shutdown() except socket.error: # normal if process is already dead address, port = self.machine.address, self.machine.ssh_port if not self.is_dead: printerrlog( "remote[%s]: unable to contact [%s] to shutdown remote processes!" % (self.name, address)) else: printlog( "remote[%s]: unable to contact [%s] to shutdown cleanly. The remote roslaunch may have exited already." % (self.name, address)) except: # temporary: don't really want to log here as this # may occur during shutdown traceback.print_exc() _logger.info("remote[%s]: closing ssh connection", self.name) self.sshin.close() self.sshout.close() self.ssherr.close() self.ssh.close() self.sshin = None self.sshout = None self.ssherr = None self.ssh = None _logger.info("remote[%s]: ssh connection closed", self.name) finally: self.lock.release()
def is_alive(self): """ @return: True if the process is alive. is_alive needs to be called periodically as it drains the SSH buffer @rtype: bool """ if self.started and not self.ssh: return False elif not self.started: return True #not started is equivalent to alive in our logic s = self.ssherr s.channel.settimeout(0) try: #drain the pipes data = s.read(2048) if not len(data): self.is_dead = True return False # #2012 il8n: ssh *should* be UTF-8, but often isn't # (e.g. Japan) data = data.decode('utf-8') printerrlog("remote[%s]: %s"%(self.name, data)) except socket.timeout: pass except IOError: return False except UnicodeDecodeError: # #2012: soft fail, printing is not essential. This occurs # with older machines that don't send utf-8 over ssh pass s = self.sshout s.channel.settimeout(0) try: #drain the pipes #TODO: write to log file data = s.read(2048) if not len(data): self.is_dead = True return False # data = data.decode('utf-8') #print "DATA", data except socket.timeout: pass except IOError: return False return True
def launch_remote_nodes(self): """ Contact each child to launch remote nodes """ succeeded = [] failed = [] # initialize remote_nodes. we use the machine config key as # the key for the dictionary so that we can bin the nodes. self.remote_nodes = {} for m in self.machine_list: self.remote_nodes[m.config_key()] = [] # build list of nodes that will be launched by machine nodes = [x for x in self.rosconfig.nodes if not is_machine_local(x.machine)] for n in nodes: self.remote_nodes[n.machine.config_key()].append(n) for child in self.remote_processes: nodes = self.remote_nodes[child.machine.config_key()] body = '\n'.join([n.to_remote_xml() for n in nodes]) # #3799: force utf-8 encoding xml = '<?xml version="1.0" encoding="utf-8"?>\n<launch>\n%s</launch>'%body api = child.getapi() # TODO: timeouts try: self.logger.debug("sending [%s] XML [\n%s\n]"%(child.uri, xml)) code, msg, val = api.launch(xml) if code == 1: c_succ, c_fail = val succeeded.extend(c_succ) failed.extend(c_fail) else: printerrlog('error launching on [%s, uri %s]: %s'%(child.name, child.uri, msg)) self._assume_failed(nodes, failed) except socket.error as e: errno, msg = e printerrlog('error launching on [%s, uri %s]: %s'%(child.name, child.uri, str(msg))) self._assume_failed(nodes, failed) except socket.gaierror as e: errno, msg = e # usually errno == -2. See #815. child_host, _ = network.parse_http_host_and_port(child.uri) printerrlog("Unable to contact remote roslaunch at [%s]. This is most likely due to a network misconfiguration with host lookups. Please make sure that you can contact '%s' from this machine"%(child.uri, child_host)) self._assume_failed(nodes, failed) except Exception as e: printerrlog('error launching on [%s, uri %s]: %s'%(child.name, child.uri, str(e))) self._assume_failed(nodes, failed) return succeeded, failed
def stop(self, errors=None): """ Terminate this process, including the SSH connection. """ if errors is None: errors = [] try: self.lock.acquire() if not self.ssh: return # call the shutdown API first as closing the SSH connection # won't actually kill the process unless it triggers SIGPIPE try: api = self.getapi() if api is not None: #TODO: probably need a timeout on this api.shutdown() except socket.error: # normal if process is already dead address, port = self.machine.address, self.machine.ssh_port if not self.is_dead: printerrlog("remote[%s]: unable to contact [%s] to shutdown remote processes!"%(self.name, address)) else: printlog("remote[%s]: unable to contact [%s] to shutdown cleanly. The remote roslaunch may have exited already."%(self.name, address)) except: # temporary: don't really want to log here as this # may occur during shutdown traceback.print_exc() _logger.info("remote[%s]: closing ssh connection", self.name) self.sshin.close() self.sshout.close() self.ssherr.close() self.ssh.close() self.sshin = None self.sshout = None self.ssherr = None self.ssh = None _logger.info("remote[%s]: ssh connection closed", self.name) finally: self.lock.release()
def log(self, client, level, message): """ Report a log message to the server @param client: name of client @type client: str @param level: log level (uses rosgraph_msgs.msg.Log levels) @type level: int @param message: message to log @type message: str """ try: if level >= Log.ERROR: printerrlog("[%s]: %s" % (client, message)) else: #hack due to the fact that we only have one INFO level if 'started with pid' in message: printlog_bold("[%s]: %s" % (client, message)) else: printlog("[%s]: %s" % (client, message)) except: # can't trust the logging system at this point, so just dump to screen traceback.print_exc() return 1, '', 1
def log(self, client, level, message): """ Report a log message to the server @param client: name of client @type client: str @param level: log level (uses roslib.msg.Log levels) @type level: int @param message: message to log @type message: str """ try: if level >= Log.ERROR: printerrlog("[%s]: %s"%(client, message)) else: #hack due to the fact that we only have one INFO level if 'started with pid' in message: printlog_bold("[%s]: %s"%(client, message)) else: printlog("[%s]: %s"%(client, message)) except: # can't trust the logging system at this point, so just dump to screen traceback.print_exc() return 1, '', 1
def launch_remote_nodes(self): """ Contact each child to launch remote nodes """ succeeded = [] failed = [] # initialize remote_nodes. we use the machine config key as # the key for the dictionary so that we can bin the nodes. self.remote_nodes = {} for m in self.machine_list: self.remote_nodes[m.config_key()] = [] # build list of nodes that will be launched by machine nodes = [ x for x in self.rosconfig.nodes if not is_machine_local(x.machine) ] for n in nodes: self.remote_nodes[n.machine.config_key()].append(n) for child in self.remote_processes: nodes = self.remote_nodes[child.machine.config_key()] body = '\n'.join([n.to_remote_xml() for n in nodes]) # force utf-8 encoding, #3799 xml = '<?xml version="1.0" encoding="utf-8"?>\n<launch>\n%s</launch>' % body if 0: print xml api = child.getapi() # TODO: timeouts try: self.logger.debug("sending [%s] XML [\n%s\n]" % (child.uri, xml)) code, msg, val = api.launch(xml) if code == 1: c_succ, c_fail = val succeeded.extend(c_succ) failed.extend(c_fail) else: printerrlog('error launching on [%s, uri %s]: %s' % (child.name, child.uri, msg)) self._assume_failed(nodes, failed) except socket.error, (errno, msg): printerrlog('error launching on [%s, uri %s]: %s' % (child.name, child.uri, str(msg))) self._assume_failed(nodes, failed) except socket.gaierror, (errno, msg): # usually errno == -2. See #815. child_host, _ = network.parse_http_host_and_port(child.uri) printerrlog( "Unable to contact remote roslaunch at [%s]. This is most likely due to a network misconfiguration with host lookups. Please make sure that you can contact '%s' from this machine" % (child.uri, child_host)) self._assume_failed(nodes, failed)
def _run(self): """ Internal run loop of ProcessMonitor """ plock = self.plock dead = [] respawn = [] while not self._registrations_complete: logger.info("mirko hack") time.sleep(0.1) #yield thread while not self.is_shutdown: with plock: #copy self.procs procs = self.procs[:] if self.is_shutdown: break # check current signal handlers to see if children have stolen them away # TODO: this code may not be necessary anymore (have to test) for s in _signal_list: if signal.getsignal(s) != rl_signal: self.reacquire_signals.add(s) for p in procs: try: if not p.is_alive(): logger.debug( "Process[%s] has died, respawn=%s, required=%s, exit_code=%s", p.name, "True(%f)" % p.respawn_delay if p.respawn else p.respawn, p.required, p.exit_code) exit_code_str = p.get_exit_description() if p.required: printerrlog( '=' * 80 + "REQUIRED process [%s] has died!\n%s\nInitiating shutdown!\n" % (p.name, exit_code_str) + '=' * 80) self.is_shutdown = True elif not p in respawn: if p.exit_code: printerrlog("[%s] %s" % (p.name, exit_code_str)) else: printlog_bold("[%s] %s" % (p.name, exit_code_str)) dead.append(p) ## no need for lock as we require listeners be ## added before process monitor is launched for l in self.listeners: l.process_died(p.name, p.exit_code) except Exception as e: traceback.print_exc() #don't respawn as this is an internal error dead.append(p) if self.is_shutdown: break #stop polling for d in dead: try: # when should_respawn() returns 0.0, bool(0.0) evaluates to False # work around this by checking if the return value is False if d.should_respawn() is not False: respawn.append(d) else: self.unregister(d) # stop process, don't accumulate errors d.stop([]) # save process data to dead list with plock: self.dead_list.append(DeadProcess(d)) except: logger.error(traceback.format_exc()) # dead check is to make sure that ProcessMonitor at least # waits until its had at least one process before exiting if self._registrations_complete and dead and not self.procs and not respawn: printlog( "all processes on machine have died, roslaunch will exit") self.is_shutdown = True del dead[:] _respawn = [] for r in respawn: try: if self.is_shutdown: break if r.should_respawn() <= 0.0: printlog("[%s] restarting process" % r.name) # stop process, don't accumulate errors r.stop([]) r.start() else: # not ready yet, keep it around _respawn.append(r) except: traceback.print_exc() logger.error("Restart failed %s", traceback.format_exc()) respawn = _respawn time.sleep(0.1) #yield thread
def _run(self): """ Internal run loop of ProcessMonitor """ plock = self.plock dead = [] respawn = [] while not self.is_shutdown: with plock: # copy self.procs procs = self.procs[:] if self.is_shutdown: break # check current signal handlers to see if children have stolen them away # TODO: this code may not be necessary anymore (have to test) for s in _signal_list: if signal.getsignal(s) != rl_signal: self.reacquire_signals.add(s) for p in procs: try: if not p.is_alive(): logger.debug( "Process[%s] has died, respawn=%s, required=%s, exit_code=%s", p.name, "True(%f)" % p.respawn_delay if p.respawn else p.respawn, p.required, p.exit_code, ) exit_code_str = p.get_exit_description() if p.required: printerrlog( "=" * 80 + "REQUIRED process [%s] has died!\n%s\nInitiating shutdown!\n" % (p.name, exit_code_str) + "=" * 80 ) self.is_shutdown = True elif not p in respawn: if p.exit_code: printerrlog("[%s] %s" % (p.name, exit_code_str)) else: printlog_bold("[%s] %s" % (p.name, exit_code_str)) dead.append(p) ## no need for lock as we require listeners be ## added before process monitor is launched for l in self.listeners: l.process_died(p.name, p.exit_code) except Exception as e: traceback.print_exc() # don't respawn as this is an internal error dead.append(p) if self.is_shutdown: break # stop polling for d in dead: try: if d.should_respawn(): respawn.append(d) else: self.unregister(d) # stop process, don't accumulate errors d.stop([]) # save process data to dead list with plock: self.dead_list.append(DeadProcess(d)) except: logger.error(traceback.format_exc()) # dead check is to make sure that ProcessMonitor at least # waits until its had at least one process before exiting if self._registrations_complete and dead and not self.procs and not respawn: printlog("all processes on machine have died, roslaunch will exit") self.is_shutdown = True del dead[:] _respawn = [] for r in respawn: try: if self.is_shutdown: break if r.should_respawn() <= 0.0: printlog("[%s] restarting process" % r.name) # stop process, don't accumulate errors r.stop([]) r.start() else: # not ready yet, keep it around _respawn.append(r) except: traceback.print_exc() logger.error("Restart failed %s", traceback.format_exc()) respawn = _respawn time.sleep(0.1) # yield thread
class ROSRemoteRunner(roslaunch.launch.ROSRemoteRunnerIF): """ Manages the running of remote roslaunch children """ def __init__(self, run_id, rosconfig, pm, server): """ @param run_id: roslaunch run_id of this runner @type run_id: str @param config: launch configuration @type config: L{ROSConfig} @param pm process monitor @type pm: L{ProcessMonitor} @param server: roslaunch parent server @type server: L{ROSLaunchParentNode} """ self.run_id = run_id self.rosconfig = rosconfig self.server = server self.pm = pm self.logger = logging.getLogger('roslaunch.remote') self.listeners = [] self.machine_list = [] self.remote_processes = [] def add_process_listener(self, l): """ Listen to events about remote processes dying. Not threadsafe. Must be called before processes started. @param l: ProcessListener @type l: L{ProcessListener} """ self.listeners.append(l) def _start_child(self, server_node_uri, machine, counter): # generate a name for the machine. don't use config key as # it's too long to easily display name = "%s-%s" % (machine.address, counter) self.logger.info("remote[%s] starting roslaunch", name) print "remote[%s] starting roslaunch" % name env_dict = setup_env(None, machine, self.rosconfig.master.uri) p = SSHChildROSLaunchProcess(self.run_id, name, server_node_uri, env_dict, machine) success = p.start() self.pm.register(p) if not success: #treat as fatal raise RLException("unable to start remote roslaunch child: %s" % name) self.server.add_child(name, p) return p def start_children(self): """ Start the child roslaunch processes """ server_node_uri = self.server.uri if not server_node_uri: raise RLException("server URI is not initialized") # TODOXXX: break out table building code into a separate # routine so we can unit test it _start_child() should not be # determining the process name # Build table of unique machines that we are going to launch on machines = {} for n in self.rosconfig.nodes: if not is_machine_local(n.machine): machines[n.machine.config_key()] = n.machine # Launch child roslaunch processes on remote machines counter = 0 # - keep a list of procs so we can check for those that failed to launch procs = [] for m in machines: p = self._start_child(server_node_uri, machines[m], counter) procs.append(p) counter += 1 # Wait for all children to call register() callback. The machines can have # non-uniform registration timeouts. We consider the failure to occur once # one of the machines has failed to meet it's timeout. start_t = time.time() while True: pending = [] for p in procs: if not p.is_alive(): raise RLException("remote roslaunch failed to launch: %s" % p.machine.name) elif not p.uri: pending.append(p.machine) if not pending: break # timeout is the minimum of the remaining timeouts of the machines timeout_t = start_t + min([m.timeout for m in pending]) if time.time() > timeout_t: break time.sleep(0.1) if pending: raise RLException( """The following roslaunch remote processes failed to register: %s If this is a network latency issue, you may wish to consider setting <machine timeout="NUMBER OF SECONDS" ... /> in your launch""" % '\n'.join([ " * %s (timeout %ss)" % (m.name, m.timeout) for m in pending ])) # convert machine dictionary to a list self.machine_list = machines.values() # save a list of the remote processes self.remote_processes = procs def _assume_failed(self, nodes, failed): """ Utility routine for logging/recording nodes that failed @param nodes: list of nodes that are assumed to have failed @type nodes: [L{Node}] @param failed: list of names of nodes that have failed to extend @type failed: [str] """ str_nodes = ["%s/%s" % (n.package, n.type) for n in nodes] failed.extend(str_nodes) printerrlog("Launch of the following nodes most likely failed: %s" % ', '.join(str_nodes)) def launch_remote_nodes(self): """ Contact each child to launch remote nodes """ succeeded = [] failed = [] # initialize remote_nodes. we use the machine config key as # the key for the dictionary so that we can bin the nodes. self.remote_nodes = {} for m in self.machine_list: self.remote_nodes[m.config_key()] = [] # build list of nodes that will be launched by machine nodes = [ x for x in self.rosconfig.nodes if not is_machine_local(x.machine) ] for n in nodes: self.remote_nodes[n.machine.config_key()].append(n) for child in self.remote_processes: nodes = self.remote_nodes[child.machine.config_key()] body = '\n'.join([n.to_remote_xml() for n in nodes]) # force utf-8 encoding, #3799 xml = '<?xml version="1.0" encoding="utf-8"?>\n<launch>\n%s</launch>' % body if 0: print xml api = child.getapi() # TODO: timeouts try: self.logger.debug("sending [%s] XML [\n%s\n]" % (child.uri, xml)) code, msg, val = api.launch(xml) if code == 1: c_succ, c_fail = val succeeded.extend(c_succ) failed.extend(c_fail) else: printerrlog('error launching on [%s, uri %s]: %s' % (child.name, child.uri, msg)) self._assume_failed(nodes, failed) except socket.error, (errno, msg): printerrlog('error launching on [%s, uri %s]: %s' % (child.name, child.uri, str(msg))) self._assume_failed(nodes, failed) except socket.gaierror, (errno, msg): # usually errno == -2. See #815. child_host, _ = network.parse_http_host_and_port(child.uri) printerrlog( "Unable to contact remote roslaunch at [%s]. This is most likely due to a network misconfiguration with host lookups. Please make sure that you can contact '%s' from this machine" % (child.uri, child_host)) self._assume_failed(nodes, failed) except Exception, e: printerrlog('error launching on [%s, uri %s]: %s' % (child.name, child.uri, str(e))) self._assume_failed(nodes, failed)
def launch_main(argv=sys.argv, real_args=None, fn_to_call=None): options = None logger = None try: from roslaunch import rlutil parser = roslaunch._get_optparse() (options, args) = parser.parse_args(argv[1:]) args = rlutil.resolve_launch_arguments(args) roslaunch._validate_args(parser, options, args) # node args doesn't require any roslaunch infrastructure, so process it first if any([ options.node_args, options.node_list, options.find_node, options.dump_params, options.file_list, options.ros_args ]): if options.node_args and not args: parser.error("please specify a launch file") from roslaunch import node_args if options.node_args: node_args.print_node_args(options.node_args, args) elif options.find_node: node_args.print_node_filename(options.find_node, args) # Dump parameters, #2685 elif options.dump_params: roslaunch_param_dump.dump_params(args) elif options.file_list: rlutil.print_file_list(args) elif options.ros_args: import arg_dump as roslaunch_arg_dump roslaunch_arg_dump.dump_args(args) else: node_args.print_node_list(args) return # we have to wait for the master here because we don't have the run_id yet if options.wait_for_master: if options.core: parser.error("--wait cannot be used with roscore") rlutil._wait_for_master() # write the pid to a file roslaunch.write_pid_file(options.pid_fn, options.core, options.port) # spin up the logging infrastructure. have to wait until we can read options.run_id uuid = rlutil.get_or_generate_uuid(options.run_id, options.wait_for_master) roslaunch.configure_logging(uuid) # #3088: don't check disk usage on remote machines if not options.child_name and not options.skip_log_check: # #2761 rlutil.check_log_disk_usage() logger = logging.getLogger('roslaunch') logger.info("roslaunch starting with args %s" % str(argv)) logger.info("roslaunch env is %s" % os.environ) if options.child_name: logger.info('starting in child mode') # This is a roslaunch child, spin up client server. # client spins up an XML-RPC server that waits for # commands and configuration from the server. from roslaunch import child as roslaunch_child c = roslaunch_child.ROSLaunchChild(uuid, options.child_name, options.server_uri) c.run() else: logger.info('starting in server mode') # #1491 change terminal name if not options.disable_title: rlutil.change_terminal_name(args, options.core) # Read roslaunch string from stdin when - is passed as launch filename. roslaunch_strs = [] if '-' in args: roslaunch_core.printlog( "Passed '-' as file argument, attempting to read roslaunch XML from stdin." ) roslaunch_strs.append(sys.stdin.read()) roslaunch_core.printlog("... %d bytes read successfully.\n" % len(roslaunch_strs[-1])) args.remove('-') # This is a roslaunch parent, spin up parent server and launch processes. # args are the roslaunch files to load from roslaunch import parent as roslaunch_parent try: # force a port binding spec if we are running a core if options.core: options.port = options.port or DEFAULT_MASTER_PORT p = roslaunch_parent.ROSLaunchParent( uuid, args, roslaunch_strs=roslaunch_strs, is_core=options.core, port=options.port, local_only=options.local_only, verbose=options.verbose, force_screen=options.force_screen, num_workers=options.num_workers, timeout=options.timeout) p.start() if fn_to_call is None: p.spin() else: fn_to_call(real_args) finally: # remove the pid file if options.pid_fn: try: os.unlink(options.pid_fn) except os.error: pass except RLException as e: roslaunch_core.printerrlog(str(e)) roslaunch_core.printerrlog( 'The traceback for the exception was written to the log file') if logger: logger.error(traceback.format_exc()) sys.exit(1) except ValueError as e: # TODO: need to trap better than this high-level trap roslaunch_core.printerrlog(str(e)) roslaunch_core.printerrlog( 'The traceback for the exception was written to the log file') if logger: logger.error(traceback.format_exc()) sys.exit(1) except Exception as e: traceback.print_exc() sys.exit(1)