Example #1
0
 def remove(self, *args, **kwargs):
     errors = 0
     lk = deimos.flock.LK(self.lock, LOCK_EX|LOCK_NB) 
     try:
         lk.lock()
     except deimos.flock.Err:
         msg = "Lock unavailable -- is cleanup already running?"
         if self.optimistic:
             log.info(msg)
             return 0
         else:
             log.error(msg)
             raise e
     try:
         for d in self.dirs(*args, **kwargs):
             state = deimos.state.state(d)
             if state is None:
                 log.warning("Not able to load state from: %s", d)
                 continue
             try:
                 cmd  = ["rm", "-rf", d + "/"]
                 cmd += [state._mesos()]
                 if state.cid() is not None:
                     cmd += [state._docker()]
                 Run()(cmd)
             except subprocess.CalledProcessError:
                 errors += 1
     finally:
         lk.unlock()
     if errors != 0:
         log.error("There were failures on %d directories", errors)
         return 4
Example #2
0
 def usage(self, usage_pb, *args):
     log.info(" ".join(args))
     container_id = usage_pb.container_id.value
     state = deimos.state.State(self.state_root, mesos_id=container_id)
     state.await_launch()
     state.ids()
     if state.cid() is None:
         log.info("Container not started?")
         return 0
     if state.exit() is not None:
         log.info("Container is stopped")
         return 0
     cg = deimos.cgroups.CGroups(**deimos.docker.cgroups(state.cid()))
     if len(cg.keys()) == 0:
         log.info("Container has no CGroups...already stopped?")
         return 0
     try:
         recordio.write(ResourceStatistics,
                        timestamp=time.time(),
                        mem_limit_bytes=cg.memory.limit(),
                        cpus_limit=cg.cpu.limit(),
                        # cpus_user_time_secs   = cg.cpuacct.user_time(),
                        # cpus_system_time_secs = cg.cpuacct.system_time(),
                        mem_rss_bytes=cg.memory.rss())
     except AttributeError as e:
         log.error("Missing CGroup!")
         raise e
     return 0
Example #3
0
 def usage(self, usage_pb, *args):
     log.info(" ".join(args))
     container_id = usage_pb.container_id.value
     state = deimos.state.State(self.state_root, mesos_id=container_id)
     state.await_launch()
     state.ids()
     if state.cid() is None:
         log.info("Container not started?")
         return 0
     if state.exit() is not None:
         log.info("Container is stopped")
         return 0
     cg = deimos.cgroups.CGroups(**deimos.docker.cgroups(state.cid()))
     if len(cg.keys()) == 0:
         log.info("Container has no CGroups...already stopped?")
         return 0
     try:
         recordio.write(
             ResourceStatistics,
             timestamp=time.time(),
             mem_limit_bytes=cg.memory.limit(),
             cpus_limit=cg.cpu.limit(),
             # cpus_user_time_secs   = cg.cpuacct.user_time(),
             # cpus_system_time_secs = cg.cpuacct.system_time(),
             mem_rss_bytes=cg.memory.rss())
     except AttributeError as e:
         log.error("Missing CGroup!")
         raise e
     return 0
Example #4
0
 def remove(self, *args, **kwargs):
     errors = 0
     lk = deimos.flock.LK(self.lock, LOCK_EX | LOCK_NB)
     try:
         lk.lock()
     except deimos.flock.Err:
         msg = "Lock unavailable -- is cleanup already running?"
         if self.optimistic:
             log.info(msg)
             return 0
         else:
             log.error(msg)
             raise e
     try:
         for d in self.dirs(*args, **kwargs):
             state = deimos.state.state(d)
             if state is None:
                 log.warning("Not able to load state from: %s", d)
                 continue
             try:
                 cmd = ["rm", "-rf", d + "/"]
                 cmd += [state._mesos()]
                 if state.cid() is not None:
                     cmd += [state._docker()]
                 Run()(cmd)
             except subprocess.CalledProcessError:
                 errors += 1
     finally:
         lk.unlock()
     if errors != 0:
         log.error("There were failures on %d directories", errors)
         return 4
Example #5
0
def read_wait_code(data):
    try:
        code = int(data)
        code = 128 + abs(code) if code < 0 else code
        return code % 256
    except:
        log.error("Result of `docker wait` wasn't an int: %r", data)
        return 111
Example #6
0
 def _docker(self, path=None, mkdir=False):
     if path is None:
         p = os.path.join(self.root, "docker", self.docker_id)
     else:
         p = os.path.join(self.root, "docker", self.docker_id, path)
     p = os.path.abspath(p)
     if mkdir:
         docker = os.path.join(self.root, "docker", self.docker_id)
         if not os.path.exists(docker):
             log.error("No Docker symlink (this should be impossible)")
             raise Err("Bad Docker symlink state")
         create(os.path.dirname(p))
     return p
Example #7
0
 def _docker(self, path=None, mkdir=False):
     if path is None:
         p = os.path.join(self.root, "docker", self.docker_id)
     else:
         p = os.path.join(self.root, "docker", self.docker_id, path)
     p = os.path.abspath(p)
     if mkdir:
         docker = os.path.join(self.root, "docker", self.docker_id)
         if not os.path.exists(docker):
             log.error("No Docker symlink (this should be impossible)")
             raise Err("Bad Docker symlink state")
         create(os.path.dirname(p))
     return p
Example #8
0
 def lock(self, name, flags, seconds=60):
     fmt_time = "indefinite" if seconds is None else "%ds" % seconds
     fmt_flags = deimos.flock.format_lock_flags(flags)
     flags, seconds = deimos.flock.nb_seconds(flags, seconds)
     log.info("request // %s %s (%s)", name, fmt_flags, fmt_time)
     p = self.resolve(os.path.join("lock", name), mkdir=True)
     lk = deimos.flock.LK(p, flags, seconds)
     try:
         lk.lock()
     except deimos.flock.Err:
         log.error("failure // %s %s (%s)", name, fmt_flags, fmt_time)
         raise
     if (flags & LOCK_EX) != 0:
         lk.handle.write(iso() + "\n")
     log.info("success // %s %s (%s)", name, fmt_flags, fmt_time)
     return lk
Example #9
0
 def lock(self, name, flags, seconds=60):
     fmt_time = "indefinite" if seconds is None else "%ds" % seconds
     fmt_flags = deimos.flock.format_lock_flags(flags)
     flags, seconds = deimos.flock.nb_seconds(flags, seconds)
     log.info("request // %s %s (%s)", name, fmt_flags, fmt_time)
     p = self.resolve(os.path.join("lock", name), mkdir=True)
     lk = deimos.flock.LK(p, flags, seconds)
     try:
         lk.lock()
     except deimos.flock.Err:
         log.error("failure // %s %s (%s)", name, fmt_flags, fmt_time)
         raise
     if (flags & LOCK_EX) != 0:
         lk.handle.write(iso() + "\n")
     log.info("success // %s %s (%s)", name, fmt_flags, fmt_time)
     return lk
Example #10
0
 def stop_docker_and_resume(self, signum):
     if self.state is not None and self.state.cid() is not None:
         cid = self.state.cid()
         log.info("Trying to stop Docker container: %s", cid)
         container_id = self.state.docker_id
         log.debug("Unwiring the container %s from MidoNet", container_id)
         try:
             log.info("state = %s", dir(self.state))
             midonet.unwire_container_from_midonet(container_id)
             log.debug("Successfully unwired the container %s from MidoNet "\
                       "bridge", container_id)
         except Exception as ex:
             log.error(traceback.format_exc())
         try:
             Run()(deimos.docker.stop(cid))
         except subprocess.CalledProcessError:
             pass
         return deimos.sig.Resume()
Example #11
0
 def stop_docker_and_resume(self, signum):
     if self.state is not None and self.state.cid() is not None:
         cid = self.state.cid()
         log.info("Trying to stop Docker container: %s", cid)
         container_id = self.state.docker_id
         log.debug("Unwiring the container %s from MidoNet", container_id)
         try:
             log.info("state = %s", dir(self.state))
             midonet.unwire_container_from_midonet(container_id)
             log.debug("Successfully unwired the container %s from MidoNet "\
                       "bridge", container_id)
         except Exception as ex:
             log.error(traceback.format_exc())
         try:
             Run()(deimos.docker.stop(cid))
         except subprocess.CalledProcessError:
             pass
         return deimos.sig.Resume()
Example #12
0
 def destroy(self, destroy_pb, *args):
     log.info(" ".join(args))
     container_id = destroy_pb.container_id.value
     state = deimos.state.State(self.state_root, mesos_id=container_id)
     state.await_launch()
     lk_d = state.lock("destroy", LOCK_EX)
     if state.exit() is None:
         container_id = state.docker_id
         log.debug("Unwiring the container %s from MidoNet", container_id)
         try:
             log.info("state_root = %s", dir(state))
             midonet.unwire_container_from_midonet(container_id)
             log.debug("Successfully unwired the container %s from MidoNet " \
                       "bridge", container_id)
         except Exception as ex:
             log.error(traceback.format_exc())
         Run()(deimos.docker.stop(state.cid()))
     else:
         log.info("Container is stopped")
     return 0
Example #13
0
 def destroy(self, destroy_pb, *args):
     log.info(" ".join(args))
     container_id = destroy_pb.container_id.value
     state = deimos.state.State(self.state_root, mesos_id=container_id)
     state.await_launch()
     lk_d = state.lock("destroy", LOCK_EX)
     if state.exit() is None:
         container_id = state.docker_id
         log.debug("Unwiring the container %s from MidoNet", container_id)
         try:
             log.info("state_root = %s", dir(state))
             midonet.unwire_container_from_midonet(container_id)
             log.debug("Successfully unwired the container %s from MidoNet " \
                       "bridge", container_id)
         except Exception as ex:
             log.error(traceback.format_exc())
         Run()(deimos.docker.stop(state.cid()))
     else:
         log.info("Container is stopped")
     return 0
Example #14
0
def cli(argv=None):
    deimos.sig.install(lambda _: None)
    if argv is None:
        argv = sys.argv
    sub = argv[1] if len(argv) > 1 else None

    if sub in ["-h", "--help", "help"]:
        print format_help()
        return 0

    conf = deimos.config.load_configuration()

    if sub == "config":
        log.info("Final configuration:")
        for _, conf in conf.items():
            print "%r" % conf
        return 0

    if sub == "locks":
        deimos.flock.lock_browser(os.path.join(conf.state.root, "mesos"))
        return 0

    if sub == "state":
        cleanup = deimos.cleanup.Cleanup(conf.state.root)
        t, rm = time.time(), False
        for arg in argv[2:]:
            if arg == "--rm":
                rm = True
                continue
            t = calendar.timegm(time.strptime(arg, "%Y-%m-%dT%H:%M:%SZ"))
        if rm:
            return cleanup.remove(t)
        else:
            for d in cleanup.dirs(t):
                sys.stdout.write(d + "\n")
            return 0

    if sub not in deimos.containerizer.methods():
        print >>sys.stderr, format_help()
        print >>sys.stderr, "** Please specify a subcommand **".center(79)
        log.error("Bad ARGV: %r" % argv[1:])
        return 1

    deimos.docker.options = conf.docker.argv()
    containerizer = deimos.containerizer.docker.Docker(
        container_settings=conf.containers,
        index_settings=conf.index,
        optimistic_unpack=conf.uris.unpack,
        state_root=conf.state.root
    )

    deimos.usage.report()
    try:
        result = deimos.containerizer.stdio(containerizer, *argv[1:])
        deimos.usage.report()
        if result is not None:
            if isinstance(result, bool):
                return 0 if result else 1
            if isinstance(result, int):
                return result
            if isinstance(result, str):
                sys.stdout.write(result)
            else:
                for item in result:
                    sys.stdout.write(str(item) + "\n")
    except Err as e:
        log.error("%s.%s: %s", type(e).__module__, type(e).__name__, str(e))
        return 4
    except subprocess.CalledProcessError as e:
        log.error(str(e))
        return 4
    except Exception:
        log.exception("Unhandled failure in %s", sub)
        return 8
    return 0
Example #15
0
    def launch(self, launch_pb, *args):
        log.info(" ".join(args))
        fork = False if "--no-fork" in args else True
        deimos.sig.install(self.log_signal)
        run_options = []
        launchy = deimos.mesos.Launch(launch_pb)
        state = deimos.state.State(self.state_root,
                                   mesos_id=launchy.container_id)
        state.push()
        lk_l = state.lock("launch", LOCK_EX)
        state.executor_id = launchy.executor_id
        state.push()
        state.ids()
        mesos_directory()  # Redundant?
        if launchy.directory:
            os.chdir(launchy.directory)
        # TODO: if launchy.user:
        #           os.seteuid(launchy.user)
        url, options = launchy.container
        options, trailing_argv = split_on(options, "//")
        url, options = self.container_settings.override(url, options)

        true_argv = launchy.argv if trailing_argv is None else trailing_argv

        image = self.determine_image(url, launchy)
        log.info("image  = %s", image)
        run_options += ["--sig-proxy"]
        run_options += ["--rm"]  # This is how we ensure container cleanup
        run_options += ["--cidfile", state.resolve("cid")]

        place_uris(launchy, self.shared_dir, self.optimistic_unpack)
        run_options += ["-w", self.workdir]

        # Docker requires an absolute path to a source filesystem, separated
        # from the bind path in the container with a colon, but the absolute
        # path to the Mesos sandbox might have colons in it (TaskIDs with
        # timestamps can cause this situation). So we create a soft link to it
        # and mount that.
        shared_full = os.path.abspath(self.shared_dir)
        sandbox_symlink = state.sandbox_symlink(shared_full)
        run_options += ["-v", "%s:%s" % (sandbox_symlink, self.workdir)]

        cpus, mems = launchy.cpu_and_mem
        env = launchy.env
        run_options += options

        env_dict = dict(env)
        if env_dict.get("MIDONET_BRIDGE_ID", None):
            run_options += ["--net=none"]

        # We need to wrap the call to Docker in a call to the Mesos executor
        # if no executor is passed as part of the task. We need to pass the
        # MESOS_* environment variables in to the container if we're going to
        # start an executor.
        observer_argv = None
        if launchy.needs_observer:
            # NB: The "@@docker@@" variant is a work around for Mesos's option
            # parser. There is a fix in the pipeline.
            observer_argv = [
                mesos_executor(), "--override",
                deimos.path.me(), "observe", state.mesos_id
            ]
            state.lock("observe", LOCK_EX | LOCK_NB)  # Explanation of Locks
            # When the observer is running, we would like its call to
            # observe() to finish before all the wait(); and we'd like the
            # observer to have a chance to report TASK_FINISHED before the
            # calls to wait() report their results (which would result in a
            # TASK_FAILED).
            #
            # For this reason, we take the "observe" lock in launch(), before
            # we call the observer and before releasing the "launch" or "wait"
            # locks.
            #
            # Calls to observe() actually skip locking "observe"; but wait()
            # calls must take this lock. The "observe" lock is held by
            # launch() until the observer executor completes, at which point
            # we can be reasonably sure its status was propagated to the Mesos
            # slave.
        else:
            env += mesos_env() + [("MESOS_DIRECTORY", self.workdir)]

        self.place_dockercfg()

        runner_argv = deimos.docker.run(run_options,
                                        image,
                                        true_argv,
                                        env=env,
                                        ports=launchy.ports,
                                        cpus=cpus,
                                        mems=mems)

        log_mesos_env(logging.DEBUG)

        observer = None
        with open("stdout", "w") as o:  # This awkward multi 'with' is a
            with open("stderr", "w") as e:  # concession to 2.6 compatibility
                with open(os.devnull) as devnull:
                    log.info(deimos.cmd.present(runner_argv))
                    self.runner = subprocess.Popen(runner_argv,
                                                   stdin=devnull,
                                                   stdout=o,
                                                   stderr=e)
                    state.pid(self.runner.pid)
                    state.await_cid()
                    log.debug("Wiring the container to MidoNet")
                    try:
                        bridge_id = env_dict.get(
                            "MIDONET_BRIDGE_ID",
                            "78488c47-d1de-4d16-a27a-4e6419dc4f88")
                        container_id = state.docker_id
                        ip_addr = env_dict.get("MIDONET_IP_ADDRESS",
                                               "192.168.100.42")
                        default_gw = env_dict.get("MIDONET_DEFAULT_GATEWAY",
                                                  None)
                        midonet.wire_container_to_midonet(
                            container_id, bridge_id, ip_addr, default_gw)
                        log.debug("Successfully wired the container %s to MidoNet " \
                                  "bridge %s", container_id, bridge_id)
                    except Exception as ex:
                        log.error(traceback.format_exc())

                    state.push()
                    lk_w = state.lock("wait", LOCK_EX)
                    lk_l.unlock()
                    if fork:
                        pid = os.fork()
                        if pid is not 0:
                            state.ids()
                            log.info("Forking watcher into child...")
                            return
                    state.ids()
                    if observer_argv is not None:
                        log.info(deimos.cmd.present(observer_argv))
                        call = deimos.cmd.in_sh(observer_argv, allstderr=False)
                        # TODO: Collect these leaking file handles.
                        obs_out = open(state.resolve("observer.out"), "w+")
                        obs_err = open(state.resolve("observer.err"), "w+")
                        # If the Mesos executor sees LIBPROCESS_PORT=0 (which
                        # is passed by the slave) there are problems when it
                        # attempts to bind. ("Address already in use").
                        # Purging both LIBPROCESS_* net variables, to be safe.
                        for v in ["LIBPROCESS_PORT", "LIBPROCESS_IP"]:
                            if v in os.environ:
                                del os.environ[v]
                        observer = subprocess.Popen(call,
                                                    stdin=devnull,
                                                    stdout=obs_out,
                                                    stderr=obs_err,
                                                    close_fds=True)
        data = Run(data=True)(deimos.docker.wait(state.cid()))
        state.exit(data)
        lk_w.unlock()
        for p, arr in [(self.runner, runner_argv), (observer, observer_argv)]:
            if p is None:
                continue
            thread = threading.Thread(target=p.wait)
            thread.start()
            thread.join(10)
            if thread.is_alive():
                log.warning(deimos.cmd.present(arr, "SIGTERM after 10s"))
                p.terminate()
            thread.join(1)
            if thread.is_alive():
                log.warning(deimos.cmd.present(arr, "SIGKILL after 1s"))
                p.kill()
            msg = deimos.cmd.present(arr, p.wait())
            if p.wait() == 0:
                log.info(msg)
            else:
                log.warning(msg)
        return state.exit()
Example #16
0
    def launch(self, launch_pb, *args):
        log.info(" ".join(args))
        fork = False if "--no-fork" in args else True
        deimos.sig.install(self.log_signal)
        run_options = []
        launchy = deimos.mesos.Launch(launch_pb)
        state = deimos.state.State(self.state_root,
                                   mesos_id=launchy.container_id)
        state.push()
        lk_l = state.lock("launch", LOCK_EX)
        state.executor_id = launchy.executor_id
        state.push()
        state.ids()
        mesos_directory()  # Redundant?
        if launchy.directory:
            os.chdir(launchy.directory)
        # TODO: if launchy.user:
        #           os.seteuid(launchy.user)
        url, options = launchy.container
        options, trailing_argv = split_on(options, "//")
        url, options = self.container_settings.override(url, options)

        true_argv = launchy.argv if trailing_argv is None else trailing_argv

        image = self.determine_image(url, launchy)
        log.info("image  = %s", image)
        run_options += ["--sig-proxy"]
        run_options += ["--rm"]       # This is how we ensure container cleanup
        run_options += ["--cidfile", state.resolve("cid")]

        place_uris(launchy, self.shared_dir, self.optimistic_unpack)
        run_options += ["-w", self.workdir]

        # Docker requires an absolute path to a source filesystem, separated
        # from the bind path in the container with a colon, but the absolute
        # path to the Mesos sandbox might have colons in it (TaskIDs with
        # timestamps can cause this situation). So we create a soft link to it
        # and mount that.
        shared_full = os.path.abspath(self.shared_dir)
        sandbox_symlink = state.sandbox_symlink(shared_full)
        run_options += ["-v", "%s:%s" % (sandbox_symlink, self.workdir)]

        cpus, mems = launchy.cpu_and_mem
        env = launchy.env
        run_options += options

        env_dict = dict(env)
        if env_dict.get("MIDONET_BRIDGE_ID", None):
            run_options += ["--net=none"]

        # We need to wrap the call to Docker in a call to the Mesos executor
        # if no executor is passed as part of the task. We need to pass the
        # MESOS_* environment variables in to the container if we're going to
        # start an executor.
        observer_argv = None
        if launchy.needs_observer:
            # NB: The "@@docker@@" variant is a work around for Mesos's option
            # parser. There is a fix in the pipeline.
            observer_argv = [mesos_executor(), "--override",
                             deimos.path.me(), "observe", state.mesos_id]
            state.lock("observe", LOCK_EX | LOCK_NB)     # Explanation of Locks
            # When the observer is running, we would like its call to
            # observe() to finish before all the wait(); and we'd like the
            # observer to have a chance to report TASK_FINISHED before the
            # calls to wait() report their results (which would result in a
            # TASK_FAILED).
            #
            # For this reason, we take the "observe" lock in launch(), before
            # we call the observer and before releasing the "launch" or "wait"
            # locks.
            #
            # Calls to observe() actually skip locking "observe"; but wait()
            # calls must take this lock. The "observe" lock is held by
            # launch() until the observer executor completes, at which point
            # we can be reasonably sure its status was propagated to the Mesos
            # slave.
        else:
            env += mesos_env() + [("MESOS_DIRECTORY", self.workdir)]

        self.place_dockercfg()

        runner_argv = deimos.docker.run(run_options, image, true_argv,
                                        env=env, ports=launchy.ports,
                                        cpus=cpus, mems=mems)

        log_mesos_env(logging.DEBUG)

        observer = None
        with open("stdout", "w") as o:        # This awkward multi 'with' is a
            with open("stderr", "w") as e:    # concession to 2.6 compatibility
                with open(os.devnull) as devnull:
                    log.info(deimos.cmd.present(runner_argv))
                    self.runner = subprocess.Popen(runner_argv, stdin=devnull,
                                                                stdout=o,
                                                                stderr=e)
                    state.pid(self.runner.pid)
                    state.await_cid()
                    log.debug("Wiring the container to MidoNet")
                    try:
                        bridge_id = env_dict.get(
                            "MIDONET_BRIDGE_ID",
                            "78488c47-d1de-4d16-a27a-4e6419dc4f88")
                        container_id = state.docker_id
                        ip_addr = env_dict.get(
                            "MIDONET_IP_ADDRESS",
                            "192.168.100.42")
                        default_gw = env_dict.get("MIDONET_DEFAULT_GATEWAY", None)
                        midonet.wire_container_to_midonet(
                            container_id, bridge_id, ip_addr, default_gw)
                        log.debug("Successfully wired the container %s to MidoNet " \
                                  "bridge %s", container_id, bridge_id)
                    except Exception as ex:
                        log.error(traceback.format_exc())

                    state.push()
                    lk_w = state.lock("wait", LOCK_EX)
                    lk_l.unlock()
                    if fork:
                        pid = os.fork()
                        if pid is not 0:
                            state.ids()
                            log.info("Forking watcher into child...")
                            return
                    state.ids()
                    if observer_argv is not None:
                        log.info(deimos.cmd.present(observer_argv))
                        call = deimos.cmd.in_sh(observer_argv, allstderr=False)
                        # TODO: Collect these leaking file handles.
                        obs_out = open(state.resolve("observer.out"), "w+")
                        obs_err = open(state.resolve("observer.err"), "w+")
                        # If the Mesos executor sees LIBPROCESS_PORT=0 (which
                        # is passed by the slave) there are problems when it
                        # attempts to bind. ("Address already in use").
                        # Purging both LIBPROCESS_* net variables, to be safe.
                        for v in ["LIBPROCESS_PORT", "LIBPROCESS_IP"]:
                            if v in os.environ:
                                del os.environ[v]
                        observer = subprocess.Popen(call, stdin=devnull,
                                                          stdout=obs_out,
                                                          stderr=obs_err,
                                                          close_fds=True)
        data = Run(data=True)(deimos.docker.wait(state.cid()))
        state.exit(data)
        lk_w.unlock()
        for p, arr in [(self.runner, runner_argv), (observer, observer_argv)]:
            if p is None:
                continue
            thread = threading.Thread(target=p.wait)
            thread.start()
            thread.join(10)
            if thread.is_alive():
                log.warning(deimos.cmd.present(arr, "SIGTERM after 10s"))
                p.terminate()
            thread.join(1)
            if thread.is_alive():
                log.warning(deimos.cmd.present(arr, "SIGKILL after 1s"))
                p.kill()
            msg = deimos.cmd.present(arr, p.wait())
            if p.wait() == 0:
                log.info(msg)
            else:
                log.warning(msg)
        return state.exit()