def run_mesos_driver(stop_signal, config): """Run an executor driver until the stop_signal event is set or the first task it runs completes.""" executor = CookExecutor(stop_signal, config) driver = MesosExecutorDriver(executor) driver.start() # check the status of the executor and bail if it has crashed while not executor.has_task_completed(): time.sleep(1) else: logging.info('Executor thread has completed') driver.stop()
def main(): logging.basicConfig(level=logging.DEBUG) log.debug("Starting executor") if not os.environ.has_key("MESOS_AGENT_ENDPOINT"): # Some Mesos setups in our tests somehow lack this variable. Provide a # fake one to maybe convince the executor driver to work. os.environ["MESOS_AGENT_ENDPOINT"] = os.environ.get( "MESOS_SLAVE_ENDPOINT", "127.0.0.1:5051") log.warning("Had to fake MESOS_AGENT_ENDPOINT as %s" % os.environ["MESOS_AGENT_ENDPOINT"]) try: urlopen("http://%s/logging/toggle?level=1&duration=15mins" % os.environ["MESOS_AGENT_ENDPOINT"]).read() log.debug("Toggled agent log level") except Exception as e: log.debug("Failed to toggle agent log level") # Parse the agent state agent_state = json.loads( urlopen("http://%s/state" % os.environ["MESOS_AGENT_ENDPOINT"]).read()) if agent_state.has_key('completed_frameworks'): # Drop the completed frameworks whichg grow over time del agent_state['completed_frameworks'] log.debug("Agent state: %s", str(agent_state)) log.debug("Virtual memory info in executor: %s" % repr(psutil.virtual_memory())) if os.path.exists('/sys/fs/cgroup/memory'): # Mesos can limit memory with a cgroup, so we should report on that. for (dirpath, dirnames, filenames) in os.walk('/sys/fs/cgroup/memory', followlinks=True): for filename in filenames: if 'limit_in_bytes' not in filename: continue log.debug('cgroup memory info from %s:' % os.path.join(dirpath, filename)) try: for line in open(os.path.join(dirpath, filename)): log.debug(line.rstrip()) except Exception as e: log.debug("Failed to read file") # Mesos can also impose rlimit limits, including on things that really # ought to not be limited, like virtual address space size. log.debug('DATA rlimit: %s', str(resource.getrlimit(resource.RLIMIT_DATA))) log.debug('STACK rlimit: %s', str(resource.getrlimit(resource.RLIMIT_STACK))) log.debug('RSS rlimit: %s', str(resource.getrlimit(resource.RLIMIT_RSS))) log.debug('AS rlimit: %s', str(resource.getrlimit(resource.RLIMIT_AS))) executor = MesosExecutor() log.debug('Made executor') driver = MesosExecutorDriver(executor, use_addict=True) old_on_event = driver.on_event def patched_on_event(event): """ Intercept and log all pymesos events. """ log.debug("Event: %s", repr(event)) old_on_event(event) driver.on_event = patched_on_event log.debug('Made driver') driver.start() log.debug('Started driver') driver_result = driver.join() log.debug('Joined driver') # Tolerate a None in addition to the code the docs suggest we should receive from join() exit_value = 0 if (driver_result is None or driver_result == 'DRIVER_STOPPED') else 1 assert len(executor.runningTasks) == 0 sys.exit(exit_value)