def wait(): """ Wait for a running container to exit. """ wait = recv_proto(Wait) # Acquire a lock for this container with container_lock(wait.container_id.value, "wait"): logger.info("Waiting for container %s", wait.container_id.value) stdout, _, return_code = invoke_docker("wait", [wait.container_id.value], stdout=PIPE) if return_code > 0: logger.error("Failed to wait for container, bad exit code (%d)", return_code) exit(1) container_exit = int(stdout.readline().rstrip()) logger.info("Container exit code: %d", container_exit) termination = Termination() termination.killed = False termination.status = container_exit termination.message = "" send_proto(termination)
def destroy(): """ Kill and remove a container. """ destroy = recv_proto(Destroy) # Acquire a lock for this container with container_lock(destroy.container_id.value): logger.info("Ensuring container %s is killed", destroy.container_id.value) stdout, _, return_code = invoke_docker("kill", [destroy.container_id.value], stdout=PIPE) if return_code > 0: logger.error("Failed to kill container, bad exit code (%d)", return_code) exit(1) logger.info("Removing container %s", destroy.container_id.value) stdout, _, return_code = invoke_docker("rm", [destroy.container_id.value], stdout=PIPE) if return_code > 0: logger.error("Failed to remove container, bad exit code (%d)", return_code) exit(1)
def destroy(): """ Kill and remove a container. """ destroy = recv_proto(Destroy) # Acquire a lock for this container with container_lock(destroy.container_id.value): success = destroy_container(destroy.container_id) if not success: exit(1)
def update(): """ Update the resources of a running container. """ update = recv_proto(Update) with container_lock(update.container_id.value, "update"): logger.info("Updating resources for container %s", update.container_id.value) # Get the container ID info = inspect_container(update.container_id.value) lxc_container_id = info["ID"] # Gather the resoures max_mem = None max_cpus = None for resource in update.resources: if resource.name == "mem": max_mem = int(resource.scalar.value) * 1024 * 1024 if resource.name == "cpus": max_cpus = int(resource.scalar.value) * 256 if resource.name == "ports": logger.error( "Unable to process an update to port configuration!") if max_mem: # Update the soft limit write_metric(lxc_container_id, "memory.soft_limit_in_bytes", max_mem) # Figure out if we can update the hard limit # If we reduce the hard limit and too much memory is in use, this # can invoke an OOM. current_mem = int( read_metric(lxc_container_id, "memory.limit_in_bytes")) if current_mem > max_mem: write_metric(lxc_container_id, "memory.limit_in_bytes", max_mem) else: logger.info("Skipping hard memory limit, would invoke OOM") if max_cpus: shares = max_cpus * 256 write_metric(lxc_container_id, "cpu.shares", shares) logger.info("Finished processing container update")
def update(): """ Update the resources of a running container. """ update = recv_proto(Update) with container_lock(update.container_id.value, "update"): logger.info("Updating resources for container %s", update.container_id.value) # Get the container ID info = inspect_container(update.container_id.value) lxc_container_id = info.get("ID", info.get("Id")) if lxc_container_id is None: raise Exception("Failed to get full container ID") # Gather the resoures max_mem = None max_cpus = None for resource in update.resources: if resource.name == "mem": max_mem = int(resource.scalar.value) * 1024 * 1024 if resource.name == "cpus": max_cpus = int(resource.scalar.value) * 256 if resource.name == "ports": logger.error("Unable to process an update to port configuration!") if max_mem: # Update the soft limit write_metric(lxc_container_id, "memory.soft_limit_in_bytes", max_mem) # Figure out if we can update the hard limit # If we reduce the hard limit and too much memory is in use, this # can invoke an OOM. current_mem = int(read_metric(lxc_container_id, "memory.limit_in_bytes")) if current_mem > max_mem: write_metric(lxc_container_id, "memory.limit_in_bytes", max_mem) else: logger.info("Skipping hard memory limit, would invoke OOM") if max_cpus: shares = max_cpus * 256 write_metric(lxc_container_id, "cpu.shares", shares) logger.info("Finished processing container update")
def launch(): """ Launch a new Mesos executor in a Docker container. """ launch = recv_proto(Launch) # Acquire a lock for this container with container_lock(launch.container_id.value): logger.info("Preparing to launch container %s", launch.container_id.value) try: run_arguments = build_docker_args(launch) except Exception, e: logger.error("Caught exception: %s", e) raise # Re-raise the exception logger.info("Launching docker container") _, _, return_code = invoke_docker("run", run_arguments) if return_code > 0: logger.error("Failed to launch container") exit(1)
def launch(): """ Launch a new Mesos executor in a Docker container. """ launch = recv_proto(Launch) # Acquire a lock for this container with container_lock(launch.container_id.value): logger.info("Preparing to launch container %s", launch.container_id.value) # Build up the docker arguments arguments = [] # Set the container ID arguments.extend([ "--name", launch.container_id.value ]) # Configure the docker network to share the hosts arguments.extend([ "--net", "host" ]) # Configure the user if launch.HasField("user"): arguments.extend([ "-u", launch.user ]) # Figure out where the executor is if launch.HasField("executor_info"): executor = launch.executor_info.command.value uris = launch.executor_info.command.uris # Environment variables for env in launch.executor_info.command.environment.variables: arguments.extend([ "-e", "%s=%s" % (env.name, env.value) ]) else: logger.info("No executor given, launching with mesos-executor") executor = "%s/mesos-executor" % os.environ['MESOS_LIBEXEC_DIRECTORY'] uris = launch.task_info.command.uris # Environment variables for env in launch.task_info.command.environment.variables: arguments.extend([ "-e", "%s=%s" % (env.name, env.value) ]) # Download the URIs logger.info("Fetching URIs") if fetch_uris(launch.directory, uris) > 0: logger.error("Mesos fetcher returned bad exit code") exit(1) # Link the mesos native library native_library = os.environ['MESOS_NATIVE_LIBRARY'] arguments.extend(["-v", "%s:/usr/lib/%s" % (native_library, os.path.basename(native_library))]) # Set the resource configuration cpu_shares = 0 max_memory = 0 ports = set() # Grab the resources from the task and executor resource_sets = [launch.task_info.resources, launch.executor_info.resources] for resources in resource_sets: for resource in resources: if resource.name == "cpus": cpu_shares += int(resource.scalar.value) if resource.name == "mem": max_memory += int(resource.scalar.value) if resource.name == "ports": for port_range in resource.ranges.range: for port in xrange(port_range.begin, port_range.end + 1): ports.add(port) if cpu_shares > 0: arguments.extend(["-c", str(cpu_shares * 256)]) if max_memory > 0: arguments.extend(["-m", "%dm" % max_memory]) if len(ports) > 0: for port in ports: arguments.extend(["-p", ":%i" % port]) logger.info("Configured with executor %s" % executor) # Add the sandbox directory arguments.extend(["-v", "%s:/mesos-sandbox" % (launch.directory)]) arguments.extend(["-w", "/mesos-sandbox"]) # Set the MESOS_DIRECTORY environment variable to the sandbox mount point arguments.extend(["-e", "MESOS_DIRECTORY=/mesos-sandbox"]) # Pass through the rest of the mesos environment variables mesos_env = ["MESOS_FRAMEWORK_ID", "MESOS_EXECUTOR_ID", "MESOS_SLAVE_ID", "MESOS_CHECKPOINT", "MESOS_SLAVE_PID", "MESOS_RECOVERY_TIMEOUT", "MESOS_NATIVE_LIBRARY"] for key in mesos_env: if key in os.environ: arguments.extend(["-e", "%s=%s" % (key, os.environ[key])]) # Parse the container image image = None extra_args = [] if launch.HasField("executor_info"): image = launch.executor_info.command.container.image for option in launch.executor_info.command.container.options: extra_args.append(option.split(" ")) else: image = launch.task_info.command.container.image for option in launch.task_info.command.container.options: extra_args.append(option.split(" ")) if not image: image = os.environ["MESOS_DEFAULT_CONTAINER_IMAGE"] if not image: logger.error("No default container image") exit(1) url = urlparse(image) image = "" if url.netloc: image = url.netloc image += url.path # Pull the image logger.info("Pulling latest docker image: %s", image) _, _, return_code = invoke_docker("pull", [image]) if return_code > 0: logger.error("Failed to pull image (%d)", return_code) exit(1) # TODO(tarnfeld): Locking run_arguments = [ "-d", # Enable daemon mode "--net=bridge" # Bridge the network with the host ] run_arguments.extend(arguments) run_arguments.extend(extra_args) run_arguments.extend(["-e", "GLOG_v=5"]) run_arguments.append(image) run_arguments.extend(["sh", "-c"]) run_arguments.append(executor + " >> stdout 2>>stderr") logger.info("Launching docker container") _, _, return_code = invoke_docker("run", run_arguments) if return_code > 0: logger.error("Failed to launch container") exit(1)
def launch(): """ Launch a new Mesos executor in a Docker container. """ launch = recv_proto(Launch) # Acquire a lock for this container with container_lock(launch.container_id.value): logger.info("Prepraring to launch container %s", launch.container_id.value) # Build up the docker arguments arguments = [] # Set the container ID arguments.extend([ "--name", launch.container_id.value ]) # Configure the user if launch.HasField("user"): arguments.extend([ "-u", launch.user ]) # Figure out where the executor is if launch.HasField("executor_info"): executor = launch.executor_info.command.value uris = launch.executor_info.command.uris # Environment variables for env in launch.executor_info.command.environment.variables: arguments.extend([ "-e", "%s=%s" % (env.name, env.value) ]) else: logger.info("No executor given, launching with mesos-executor") executor = "%s/mesos-executor" % os.environ['MESOS_LIBEXEC_DIRECTORY'] uris = launch.task_info.command.uris # Environment variables for env in launch.task_info.command.environment.variables: arguments.extend([ "-e", "%s=%s" % (env.name, env.value) ]) # Download the URIs logger.info("Fetching URIs") if fetch_uris(launch.directory, uris) > 0: logger.error("Mesos fetcher returned bad exit code") exit(1) # Link the mesos native library native_library = os.environ['MESOS_NATIVE_LIBRARY'] arguments.extend(["-v", "%s:/usr/lib/%s" % (native_library, os.path.basename(native_library))]) # Set the resource configuration for resource in launch.task_info.resources: if resource.name == "cpus": arguments.extend(["-c", str(int(resource.scalar.value * 256))]) if resource.name == "mem": arguments.extend(["-m", "%dm" % (int(resource.scalar.value))]) if resource.name == "ports": for port_range in resource.ranges.range: for port in xrange(port_range.begin, port_range.end + 1): arguments.extend(["-p", "%i:%i" % (port, port)]) logger.info("Configured with executor %s" % executor) # Add the sandbox directory arguments.extend(["-v", "%s:/mesos-sandbox" % (launch.directory)]) arguments.extend(["-w", "/mesos-sandbox"]) # Set the MESOS_DIRECTORY environment variable to the sandbox mount point arguments.extend(["-e", "MESOS_DIRECTORY=/mesos-sandbox"]) # Pass through the rest of the mesos environment variables mesos_env = ["MESOS_FRAMEWORK_ID", "MESOS_EXECUTOR_ID", "MESOS_SLAVE_ID", "MESOS_CHECKPOINT", "MESOS_SLAVE_PID", "MESOS_RECOVERY_TIMEOUT"] for key in mesos_env: if key in os.environ: arguments.extend(["-e", "%s=%s" % (key, os.environ[key])]) # Parse the container image image = None extra_args = [] if launch.task_info.HasField("executor"): image = launch.executor_info.command.container.image for option in launch.executor_info.command.container.options: extra_args.append(option.split(" ")) else: image = launch.task_info.command.container.image for option in launch.task_info.command.container.options: extra_args.append(option.split(" ")) if not image: image = os.environ["MESOS_DEFAULT_CONTAINER_IMAGE"] if not image: logger.error("No default container image") exit(1) url = urlparse(image) image = "" if url.netloc: image = url.netloc image += url.path # TODO(tarnfeld): Locking run_arguments = [ "-d", # Enable daemon mode "--net=bridge" # Bridge the network with the host ] run_arguments.extend(arguments) run_arguments.extend(extra_args) run_arguments.extend(["-e", "GLOG_v=5"]) run_arguments.append(image) run_arguments.extend(["sh", "-c"]) run_arguments.append(executor + " >> stdout 2>>stderr") logger.info("Launching docker container") _, _, return_code = invoke_docker("run", run_arguments) if return_code > 0: logger.error("Failed to launch container") exit(1)