def usage(self, *args): log.info(" ".join(args)) message = recordio.read(Usage) container_id = message.container_id.value state = deimos.state.State(self.state_root, mesos_id=container_id) state.await_launch() state.ids() if state.cid() is None: log.info("Container not started?") return 0 if state.exit() is not None: log.info("Container is stopped") return 0 cg = deimos.cgroups.CGroups(**deimos.docker.cgroups(state.cid())) if len(cg.keys()) == 0: log.info("Container has no CGroups...already stopped?") return 0 try: recordio.write(ResourceStatistics, timestamp = time.time(), mem_limit_bytes = cg.memory.limit(), cpus_limit = cg.cpu.limit(), # cpus_user_time_secs = cg.cpuacct.user_time(), # cpus_system_time_secs = cg.cpuacct.system_time(), mem_rss_bytes = cg.memory.rss()) except AttributeError as e: log.error("Missing CGroup!") raise e return 0
def destroy(self, *args): log.info(" ".join(args)) message = recordio.read(Destroy) container_id = message.container_id.value state = deimos.state.State(self.state_root, mesos_id=container_id) state.await_launch() lk_d = state.lock("destroy", LOCK_EX) if state.exit() is None: Run()(deimos.docker.stop(state.cid())) else: log.info("Container is stopped") return 0
def stdio(containerizer, *args): """Connect containerizer class to command line args and STDIN Dispatches to an appropriate containerizer method based on the first argument and parses the input using an appropriate Protobuf type. launch < containerizer::Launch update < containerizer::Update usage < containerizer::Usage > mesos::ResourceStatistics wait < containerizer::Wait > containerizer::Termination destroy < containerizer::Destroy containers > containerizer::Containers recover Output serialization must be handled by the containerizer method (it doesn't necessarily happen at the end). Not really part of the containerizer protocol but exposed by Deimos as a subcommand: # Follows a Docker ID, PID, &c and exits with an appropriate, matching # exit code, in a manner specific to the containerizer observe <id> """ try: name = args[0] method, proto = { "launch": (containerizer.launch, Launch), "update": (containerizer.update, Update), "usage": (containerizer.usage, Usage), "wait": (containerizer.wait, Wait), "destroy": (containerizer.destroy, Destroy), "containers": (containerizer.containers, None), "recover": (containerizer.recover, None), "observe": (containerizer.observe, None) }[name] except IndexError: raise Err("Please choose a subcommand") except KeyError: raise Err("Subcommand %s is not valid for containerizers" % name) log.debug("%r", (method, proto)) if proto is not None: method(recordio.read(proto), *args[1:]) else: method(*args[1:])
def stdio(containerizer, *args): """Connect containerizer class to command line args and STDIN Dispatches to an appropriate containerizer method based on the first argument and parses the input using an appropriate Protobuf type. launch < containerizer::Launch update < containerizer::Update usage < containerizer::Usage > mesos::ResourceStatistics wait < containerizer::Wait > containerizer::Termination destroy < containerizer::Destroy containers > containerizer::Containers recover Output serialization must be handled by the containerizer method (it doesn't necessarily happen at the end). Not really part of the containerizer protocol but exposed by Deimos as a subcommand: # Follows a Docker ID, PID, &c and exits with an appropriate, matching # exit code, in a manner specific to the containerizer observe <id> """ try: name = args[0] method, proto = {"launch": (containerizer.launch, Launch), "update": (containerizer.update, Update), "usage": (containerizer.usage, Usage), "wait": (containerizer.wait, Wait), "destroy": (containerizer.destroy, Destroy), "containers": (containerizer.containers, None), "recover": (containerizer.recover, None), "observe": (containerizer.observe, None)}[name] except IndexError: raise Err("Please choose a subcommand") except KeyError: raise Err("Subcommand %s is not valid for containerizers" % name) log.debug("%r", (method, proto)) if proto is not None: return method(recordio.read(proto), *args[1:]) else: return method(*args[1:])
def wait(self, *args): log.info(" ".join(args)) observe = False # NB: The "@@observe-docker@@" variant is a work around for Mesos's # option parser. There is a fix in the pipeline. if list(args[0:1]) in [ ["--observe-docker"], ["@@observe-docker@@"] ]: # In Docker mode, we use Docker wait to wait for the container # and then exit with the returned exit code. The Docker CID is # passed on the command line. state = deimos.state.State(self.state_root, docker_id=args[1]) observe = True else: message = recordio.read(Wait) container_id = message.container_id.value state = deimos.state.State(self.state_root, mesos_id=container_id) self.state = state deimos.sig.install(self.stop_docker_and_resume) state.await_launch() try: if not observe: state.lock("observe", LOCK_SH, seconds=None) state.lock("wait", LOCK_SH, seconds=None) except IOError as e: # Allows for signal recovery if e.errno != errno.EINTR: raise e if not observe: state.lock("observe", LOCK_SH, seconds=1) state.lock("wait", LOCK_SH, seconds=1) termination = (state.exit() if state.exit() is not None else 64) << 8 recordio.write(Termination, killed = False, message = "", status = termination) if state.exit() is not None: return state.exit() raise Err("Wait lock is not held nor is exit file present")
def launch(self, *args): log.info(" ".join(args)) fork = False if "--no-fork" in args else True deimos.sig.install(self.log_signal) run_options = [] proto = recordio.read(Launch) launchy = deimos.mesos.Launch(proto) state = deimos.state.State(self.state_root, mesos_id=launchy.container_id) state.push() lk_l = state.lock("launch", LOCK_EX) state.executor_id = launchy.executor_id state.push() state.ids() mesos_directory() # Redundant? if launchy.directory: os.chdir(launchy.directory) # TODO: if launchy.user: # os.seteuid(launchy.user) url, options = self.container_settings.override(*launchy.container) pre, image = re.split(r"^docker:///?", url) if pre != "": raise Err("URL '%s' is not a valid docker:// URL!" % url) if image == "": image = self.default_image(launchy) log.info("image = %s", image) run_options += [ "--sig-proxy" ] run_options += [ "--rm" ] # This is how we ensure container cleanup run_options += [ "--cidfile", state.resolve("cid") ] place_uris(launchy, self.shared_dir, self.optimistic_unpack) run_options += [ "-w", self.workdir ] # Docker requires an absolute path to a source filesystem, separated # from the bind path in the container with a colon, but the absolute # path to the Mesos sandbox might have colons in it (TaskIDs with # timestamps can cause this situation). So we create a soft link to it # and mount that. shared_full = os.path.abspath(self.shared_dir) sandbox_symlink = state.sandbox_symlink(shared_full) run_options += [ "-v", "%s:%s" % (sandbox_symlink, self.workdir) ] cpus, mems = launchy.cpu_and_mem env = launchy.env run_options += options # We need to wrap the call to Docker in a call to the Mesos executor # if no executor is passed as part of the task. We need to pass the # MESOS_* environment variables in to the container if we're going to # start an executor. observer_argv = None if launchy.needs_observer: # NB: The "@@docker@@" variant is a work around for Mesos's option # parser. There is a fix in the pipeline. observer_argv = [ mesos_executor(), "--override", deimos.path.me(), "wait", "@@observe-docker@@" ] state.lock("observe", LOCK_EX|LOCK_NB) ####### Explanation of Locks # When the observer is running, we would like its call to wait() # to finish before all others; and we'd like the observer to have # a chance to report TASK_FINISHED before the calls to wait() # report their results (which would result in a TASK_FAILED). # # For this reason, we take the "observe" lock in launch(), before # we call the observer and before releasing the "launch" or "wait" # locks. # # Calls to wait() in observer mode will actually skip locking # "observe"; but other wait calls must take this lock. The # "observe" lock is held by launch() until the observer executor # completes, at which point we can be reasonably sure its status # was propagated to the Mesos slave. else: env += mesos_env() + [("MESOS_DIRECTORY", self.workdir)] runner_argv = deimos.docker.run(run_options, image, launchy.argv, env=env, ports=launchy.ports, cpus=cpus, mems=mems) log_mesos_env(logging.DEBUG) observer = None with open("stdout", "w") as o: # This awkward multi 'with' is a with open("stderr", "w") as e: # concession to 2.6 compatibility with open(os.devnull) as devnull: log.info(deimos.cmd.present(runner_argv)) self.runner = subprocess.Popen(runner_argv, stdin=devnull, stdout=o, stderr=e) state.pid(self.runner.pid) state.await_cid() state.push() lk_w = state.lock("wait", LOCK_EX) lk_l.unlock() if fork: pid = os.fork() if pid is not 0: state.ids() log.info("Forking watcher into child...") return state.ids() if observer_argv is not None: observer_argv += [state.cid()] log.info(deimos.cmd.present(observer_argv)) call = deimos.cmd.in_sh(observer_argv, allstderr=False) # TODO: Collect these leaking file handles. obs_out = open(state.resolve("observer.out"), "w+") obs_err = open(state.resolve("observer.err"), "w+") # If the Mesos executor sees LIBPROCESS_PORT=0 (which # is passed by the slave) there are problems when it # attempts to bind. ("Address already in use"). # Purging both LIBPROCESS_* net variables, to be safe. for v in ["LIBPROCESS_PORT", "LIBPROCESS_IP"]: if v in os.environ: del os.environ[v] observer = subprocess.Popen(call, stdin=devnull, stdout=obs_out, stderr=obs_err, close_fds=True) data = Run(data=True)(deimos.docker.wait(state.cid())) state.exit(data) lk_w.unlock() for p, arr in [(self.runner, runner_argv), (observer, observer_argv)]: if p is None: continue thread = threading.Thread(target=p.wait) thread.start() thread.join(10) if thread.is_alive(): log.warning(deimos.cmd.present(arr, "SIGTERM after 10s")) p.terminate() thread.join(1) if thread.is_alive(): log.warning(deimos.cmd.present(arr, "SIGKILL after 1s")) p.kill() msg = deimos.cmd.present(arr, p.wait()) if p.wait() == 0: log.info(msg) else: log.warning(msg) return state.exit()