Ejemplo n.º 1
0
def wait():
    """
    Wait for a running container to exit.
    """

    wait = recv_proto(Wait)

    # Acquire a lock for this container
    with container_lock(wait.container_id.value, "wait"):

        logger.info("Waiting for container %s", wait.container_id.value)

        stdout, _, return_code = invoke_docker("wait", [wait.container_id.value], stdout=PIPE)
        if return_code > 0:
            logger.error("Failed to wait for container, bad exit code (%d)", return_code)
            exit(1)

        container_exit = int(stdout.readline().rstrip())
        logger.info("Container exit code: %d", container_exit)

        termination = Termination()
        termination.killed = False
        termination.status = container_exit
        termination.message = ""

        send_proto(termination)
def destroy():
    """
    Kill and remove a container.
    """

    destroy = recv_proto(Destroy)

    # Acquire a lock for this container
    with container_lock(destroy.container_id.value):

        logger.info("Ensuring container %s is killed",
                    destroy.container_id.value)

        stdout, _, return_code = invoke_docker("kill",
                                               [destroy.container_id.value],
                                               stdout=PIPE)
        if return_code > 0:
            logger.error("Failed to kill container, bad exit code (%d)",
                         return_code)
            exit(1)

        logger.info("Removing container %s", destroy.container_id.value)

        stdout, _, return_code = invoke_docker("rm",
                                               [destroy.container_id.value],
                                               stdout=PIPE)
        if return_code > 0:
            logger.error("Failed to remove container, bad exit code (%d)",
                         return_code)
            exit(1)
Ejemplo n.º 3
0
def usage():
    """
    Retrieve usage information about a running container.
    """

    usage = recv_proto(Usage)
    logger.info("Retrieving usage for container %s", usage.container_id.value)

    # Find the lxc container ID
    info = inspect_container(usage.container_id.value)
    lxc_container_id = info.get("ID", info.get("Id"))

    if lxc_container_id is None:
        raise Exception("Failed to get full container ID")

    logger.info("Using LXC container ID %s", lxc_container_id)

    stats = ResourceStatistics()
    stats.timestamp = int(time.time())

    # Get the number of CPU ticks
    ticks = os.sysconf("SC_CLK_TCK")
    if not ticks > 0:
        logger.error("Unable to retrieve number of CPU clock ticks")
        exit(1)

    collect_container_stats(lxc_container_id, stats, ticks)

    logger.debug("Container usage: %s", stats)

    # Send the stats back to mesos
    send_proto(stats)
def destroy():
    """
    Kill and remove a container.
    """

    destroy = recv_proto(Destroy)

    # Acquire a lock for this container
    with container_lock(destroy.container_id.value):
        success = destroy_container(destroy.container_id)

    if not success:
        exit(1)
Ejemplo n.º 5
0
def update():
    """
    Update the resources of a running container.
    """

    update = recv_proto(Update)

    with container_lock(update.container_id.value, "update"):

        logger.info("Updating resources for container %s",
                    update.container_id.value)

        # Get the container ID
        info = inspect_container(update.container_id.value)
        lxc_container_id = info["ID"]

        # Gather the resoures
        max_mem = None
        max_cpus = None

        for resource in update.resources:
            if resource.name == "mem":
                max_mem = int(resource.scalar.value) * 1024 * 1024
            if resource.name == "cpus":
                max_cpus = int(resource.scalar.value) * 256
            if resource.name == "ports":
                logger.error(
                    "Unable to process an update to port configuration!")

        if max_mem:
            # Update the soft limit
            write_metric(lxc_container_id, "memory.soft_limit_in_bytes",
                         max_mem)

            # Figure out if we can update the hard limit
            # If we reduce the hard limit and too much memory is in use, this
            # can invoke an OOM.
            current_mem = int(
                read_metric(lxc_container_id, "memory.limit_in_bytes"))
            if current_mem > max_mem:
                write_metric(lxc_container_id, "memory.limit_in_bytes",
                             max_mem)
            else:
                logger.info("Skipping hard memory limit, would invoke OOM")

        if max_cpus:
            shares = max_cpus * 256
            write_metric(lxc_container_id, "cpu.shares", shares)

        logger.info("Finished processing container update")
Ejemplo n.º 6
0
def update():
    """
    Update the resources of a running container.
    """

    update = recv_proto(Update)

    with container_lock(update.container_id.value, "update"):

        logger.info("Updating resources for container %s", update.container_id.value)

        # Get the container ID
        info = inspect_container(update.container_id.value)
        lxc_container_id = info.get("ID", info.get("Id"))

        if lxc_container_id is None:
            raise Exception("Failed to get full container ID")

        # Gather the resoures
        max_mem = None
        max_cpus = None

        for resource in update.resources:
            if resource.name == "mem":
                max_mem = int(resource.scalar.value) * 1024 * 1024
            if resource.name == "cpus":
                max_cpus = int(resource.scalar.value) * 256
            if resource.name == "ports":
                logger.error("Unable to process an update to port configuration!")

        if max_mem:
            # Update the soft limit
            write_metric(lxc_container_id, "memory.soft_limit_in_bytes", max_mem)

            # Figure out if we can update the hard limit
            # If we reduce the hard limit and too much memory is in use, this
            # can invoke an OOM.
            current_mem = int(read_metric(lxc_container_id, "memory.limit_in_bytes"))
            if current_mem > max_mem:
                write_metric(lxc_container_id, "memory.limit_in_bytes", max_mem)
            else:
                logger.info("Skipping hard memory limit, would invoke OOM")

        if max_cpus:
            shares = max_cpus * 256
            write_metric(lxc_container_id, "cpu.shares", shares)

        logger.info("Finished processing container update")
def launch():
    """
    Launch a new Mesos executor in a Docker container.
    """

    launch = recv_proto(Launch)

    # Acquire a lock for this container
    with container_lock(launch.container_id.value):

        logger.info("Preparing to launch container %s", launch.container_id.value)

        try:
            run_arguments = build_docker_args(launch)
        except Exception, e:
            logger.error("Caught exception: %s", e)
            raise  # Re-raise the exception

        logger.info("Launching docker container")
        _, _, return_code = invoke_docker("run", run_arguments)

        if return_code > 0:
            logger.error("Failed to launch container")
            exit(1)
Ejemplo n.º 8
0
def launch():
    """
    Launch a new Mesos executor in a Docker container.
    """

    launch = recv_proto(Launch)

    # Acquire a lock for this container
    with container_lock(launch.container_id.value):

        logger.info("Preparing to launch container %s", launch.container_id.value)

        # Build up the docker arguments
        arguments = []

        # Set the container ID
        arguments.extend([
            "--name", launch.container_id.value
        ])

        # Configure the docker network to share the hosts
        arguments.extend([
            "--net", "host"
        ])

        # Configure the user
        if launch.HasField("user"):
            arguments.extend([
                "-u", launch.user
            ])

        # Figure out where the executor is
        if launch.HasField("executor_info"):
            executor = launch.executor_info.command.value
            uris = launch.executor_info.command.uris

            # Environment variables
            for env in launch.executor_info.command.environment.variables:
                arguments.extend([
                    "-e",
                    "%s=%s" % (env.name, env.value)
                ])
        else:
            logger.info("No executor given, launching with mesos-executor")
            executor = "%s/mesos-executor" % os.environ['MESOS_LIBEXEC_DIRECTORY']
            uris = launch.task_info.command.uris

            # Environment variables
            for env in launch.task_info.command.environment.variables:
                arguments.extend([
                    "-e",
                    "%s=%s" % (env.name, env.value)
                ])

        # Download the URIs
        logger.info("Fetching URIs")
        if fetch_uris(launch.directory, uris) > 0:
            logger.error("Mesos fetcher returned bad exit code")
            exit(1)

        # Link the mesos native library
        native_library = os.environ['MESOS_NATIVE_LIBRARY']
        arguments.extend(["-v", "%s:/usr/lib/%s" % (native_library, os.path.basename(native_library))])

        # Set the resource configuration
        cpu_shares = 0
        max_memory = 0
        ports = set()

        # Grab the resources from the task and executor
        resource_sets = [launch.task_info.resources,
                         launch.executor_info.resources]
        for resources in resource_sets:
            for resource in resources:
                if resource.name == "cpus":
                    cpu_shares += int(resource.scalar.value)
                if resource.name == "mem":
                    max_memory += int(resource.scalar.value)
                if resource.name == "ports":
                    for port_range in resource.ranges.range:
                        for port in xrange(port_range.begin, port_range.end + 1):
                            ports.add(port)

        if cpu_shares > 0:
            arguments.extend(["-c", str(cpu_shares * 256)])
        if max_memory > 0:
            arguments.extend(["-m", "%dm" % max_memory])
        if len(ports) > 0:
            for port in ports:
                arguments.extend(["-p", ":%i" % port])

        logger.info("Configured with executor %s" % executor)

        # Add the sandbox directory
        arguments.extend(["-v", "%s:/mesos-sandbox" % (launch.directory)])
        arguments.extend(["-w", "/mesos-sandbox"])

        # Set the MESOS_DIRECTORY environment variable to the sandbox mount point
        arguments.extend(["-e", "MESOS_DIRECTORY=/mesos-sandbox"])

        # Pass through the rest of the mesos environment variables
        mesos_env = ["MESOS_FRAMEWORK_ID", "MESOS_EXECUTOR_ID",
                     "MESOS_SLAVE_ID", "MESOS_CHECKPOINT",
                     "MESOS_SLAVE_PID", "MESOS_RECOVERY_TIMEOUT",
                     "MESOS_NATIVE_LIBRARY"]
        for key in mesos_env:
            if key in os.environ:
                arguments.extend(["-e", "%s=%s" % (key, os.environ[key])])

        # Parse the container image
        image = None
        extra_args = []
        if launch.HasField("executor_info"):
            image = launch.executor_info.command.container.image
            for option in launch.executor_info.command.container.options:
                extra_args.append(option.split(" "))
        else:
            image = launch.task_info.command.container.image
            for option in launch.task_info.command.container.options:
                extra_args.append(option.split(" "))

        if not image:
            image = os.environ["MESOS_DEFAULT_CONTAINER_IMAGE"]
        if not image:
            logger.error("No default container image")
            exit(1)

        url = urlparse(image)
        image = ""
        if url.netloc:
            image = url.netloc
        image += url.path

        # Pull the image
        logger.info("Pulling latest docker image: %s", image)
        _, _, return_code = invoke_docker("pull", [image])
        if return_code > 0:
            logger.error("Failed to pull image (%d)", return_code)
            exit(1)

        # TODO(tarnfeld): Locking

        run_arguments = [
            "-d",  # Enable daemon mode
            "--net=bridge"  # Bridge the network with the host
        ]

        run_arguments.extend(arguments)
        run_arguments.extend(extra_args)
        run_arguments.extend(["-e", "GLOG_v=5"])
        run_arguments.append(image)
        run_arguments.extend(["sh", "-c"])
        run_arguments.append(executor + " >> stdout 2>>stderr")

        logger.info("Launching docker container")
        _, _, return_code = invoke_docker("run", run_arguments)

        if return_code > 0:
            logger.error("Failed to launch container")
            exit(1)
Ejemplo n.º 9
0
def usage():
    """
    Retrieve usage information about a running container.
    """

    usage = recv_proto(Usage)
    logger.info("Retrieving usage for container %s", usage.container_id.value)

    # Find the lxc container ID
    info = inspect_container(usage.container_id.value)
    lxc_container_id = info.get("ID", info.get("Id"))

    if lxc_container_id is None:
        raise Exception("Failed to get full container ID")

    logger.info("Using LXC container ID %s", lxc_container_id)

    stats = ResourceStatistics()
    stats.timestamp = int(time.time())

    # Get the number of CPU ticks
    ticks = os.sysconf("SC_CLK_TCK")
    if not ticks > 0:
        logger.error("Unable to retrieve number of CPU clock ticks")
        exit(1)

    # Retrieve the CPU stats
    try:
        stats.cpus_limit = float(read_metric(lxc_container_id, "cpu.shares")) / 256
        cpu_stats = dict(read_metrics(lxc_container_id, "cpuacct.stat"))
        if "user" in cpu_stats and "system" in cpu_stats:
            stats.cpus_user_time_secs = float(cpu_stats["user"]) / ticks
            stats.cpus_system_time_secs = float(cpu_stats["system"]) / ticks
    except:
        logger.error("Failed to get CPU usage")

    try:
        cpu_stats = dict(read_metrics(lxc_container_id, "cpu.stat"))
        if "nr_periods" in cpu_stats:
            stats.cpus_nr_periods = int(cpu_stats["nr_periods"])
        if "nr_throttled" in cpu_stats:
            stats.cpus_nr_throttled = int(cpu_stats["nr_throttled"])
        if "throttled_time" in cpu_stats:
            throttled_time_nano = int(cpu_stats["throttled_time"])
            throttled_time_secs = throttled_time_nano / 1000000000
            stats.cpus_throttled_time_secs = throttled_time_secs
    except:
        logger.error("Failed to get detailed CPU usage")

    # Retrieve the mem stats
    try:
        stats.mem_limit_bytes = int(read_metric(lxc_container_id, "memory.limit_in_bytes"))
        stats.mem_rss_bytes = int(read_metric(lxc_container_id, "memory.usage_in_bytes"))
    except:
        logger.error("Failed to get memory usage")

    try:
        mem_stats = dict(read_metrics(lxc_container_id, "memory.stat"))
        if "total_cache" in mem_stats:
            stats.mem_file_bytes = int(mem_stats["total_cache"])
        if "total_rss" in mem_stats:
            stats.mem_anon_bytes = int(mem_stats["total_rss"])
        if "total_mapped_file" in mem_stats:
            stats.mem_mapped_file_bytes = int(mem_stats["total_mapped_file"])
    except:
        logger.error("Failed to get detailed memory usage")

    logger.debug("Container usage: %s", stats)

    # Send the stats back to mesos
    send_proto(stats)
def usage():
    """
    Retrieve usage information about a running container.
    """

    usage = recv_proto(Usage)
    logger.info("Retrieving usage for container %s", usage.container_id.value)

    # Find the lxc container ID
    info = inspect_container(usage.container_id.value)
    lxc_container_id = info["ID"]

    logger.info("Using LXC container ID %s", lxc_container_id)

    stats = ResourceStatistics()
    stats.timestamp = int(time.time())

    # Get the number of CPU ticks
    ticks = os.sysconf("SC_CLK_TCK")
    if not ticks > 0:
        logger.error("Unable to retrieve number of CPU clock ticks")
        exit(1)

    # Retrieve the CPU stats
    try:
        stats.cpus_limit = float(read_metric(lxc_container_id,
                                             "cpu.shares")) / 256
        cpu_stats = dict(read_metrics(lxc_container_id, "cpuacct.stat"))
        if "user" in cpu_stats and "system" in cpu_stats:
            stats.cpus_user_time_secs = float(cpu_stats["user"]) / ticks
            stats.cpus_system_time_secs = float(cpu_stats["system"]) / ticks
    except:
        logger.error("Failed to get CPU usage")

    try:
        cpu_stats = dict(read_metrics(lxc_container_id, "cpu.stat"))
        if "nr_periods" in cpu_stats:
            stats.cpus_nr_periods = int(cpu_stats["nr_periods"])
        if "nr_throttled" in cpu_stats:
            stats.cpus_nr_throttled = int(cpu_stats["nr_throttled"])
        if "throttled_time" in cpu_stats:
            throttled_time_nano = int(cpu_stats["throttled_time"])
            throttled_time_secs = throttled_time_nano / 1000000000
            stats.cpus_throttled_time_secs = throttled_time_secs
    except:
        logger.error("Failed to get detailed CPU usage")

    # Retrieve the mem stats
    try:
        stats.mem_limit_bytes = int(
            read_metric(lxc_container_id, "memory.limit_in_bytes"))
        stats.mem_rss_bytes = int(
            read_metric(lxc_container_id, "memory.usage_in_bytes"))
    except:
        logger.error("Failed to get memory usage")

    try:
        mem_stats = dict(read_metrics(lxc_container_id, "memory.stat"))
        if "total_cache" in mem_stats:
            stats.mem_file_bytes = int(mem_stats["total_cache"])
        if "total_rss" in mem_stats:
            stats.mem_anon_bytes = int(mem_stats["total_rss"])
        if "total_mapped_file" in mem_stats:
            stats.mem_mapped_file_bytes = int(mem_stats["total_mapped_file"])
    except:
        logger.error("Failed to get detailed memory usage")

    logger.debug("Container usage: %s", stats)

    # Send the stats back to mesos
    send_proto(stats)
Ejemplo n.º 11
0
def launch():
    """
    Launch a new Mesos executor in a Docker container.
    """

    launch = recv_proto(Launch)

    # Acquire a lock for this container
    with container_lock(launch.container_id.value):

        logger.info("Prepraring to launch container %s", launch.container_id.value)

        # Build up the docker arguments
        arguments = []

        # Set the container ID
        arguments.extend([
            "--name", launch.container_id.value
        ])

        # Configure the user
        if launch.HasField("user"):
            arguments.extend([
                "-u", launch.user
            ])

        # Figure out where the executor is
        if launch.HasField("executor_info"):
            executor = launch.executor_info.command.value
            uris = launch.executor_info.command.uris

            # Environment variables
            for env in launch.executor_info.command.environment.variables:
                arguments.extend([
                    "-e",
                    "%s=%s" % (env.name, env.value)
                ])
        else:
            logger.info("No executor given, launching with mesos-executor")
            executor = "%s/mesos-executor" % os.environ['MESOS_LIBEXEC_DIRECTORY']
            uris = launch.task_info.command.uris

            # Environment variables
            for env in launch.task_info.command.environment.variables:
                arguments.extend([
                    "-e",
                    "%s=%s" % (env.name, env.value)
                ])

        # Download the URIs
        logger.info("Fetching URIs")
        if fetch_uris(launch.directory, uris) > 0:
            logger.error("Mesos fetcher returned bad exit code")
            exit(1)

        # Link the mesos native library
        native_library = os.environ['MESOS_NATIVE_LIBRARY']
        arguments.extend(["-v", "%s:/usr/lib/%s" % (native_library, os.path.basename(native_library))])

        # Set the resource configuration
        for resource in launch.task_info.resources:
            if resource.name == "cpus":
                arguments.extend(["-c", str(int(resource.scalar.value * 256))])
            if resource.name == "mem":
                arguments.extend(["-m", "%dm" % (int(resource.scalar.value))])
            if resource.name == "ports":
                for port_range in resource.ranges.range:
                    for port in xrange(port_range.begin, port_range.end + 1):
                        arguments.extend(["-p", "%i:%i" % (port, port)])

        logger.info("Configured with executor %s" % executor)

        # Add the sandbox directory
        arguments.extend(["-v", "%s:/mesos-sandbox" % (launch.directory)])
        arguments.extend(["-w", "/mesos-sandbox"])

        # Set the MESOS_DIRECTORY environment variable to the sandbox mount point
        arguments.extend(["-e", "MESOS_DIRECTORY=/mesos-sandbox"])

        # Pass through the rest of the mesos environment variables
        mesos_env = ["MESOS_FRAMEWORK_ID", "MESOS_EXECUTOR_ID",
                     "MESOS_SLAVE_ID", "MESOS_CHECKPOINT",
                     "MESOS_SLAVE_PID", "MESOS_RECOVERY_TIMEOUT"]
        for key in mesos_env:
            if key in os.environ:
                arguments.extend(["-e", "%s=%s" % (key, os.environ[key])])

        # Parse the container image
        image = None
        extra_args = []
        if launch.task_info.HasField("executor"):
            image = launch.executor_info.command.container.image
            for option in launch.executor_info.command.container.options:
                extra_args.append(option.split(" "))
        else:
            image = launch.task_info.command.container.image
            for option in launch.task_info.command.container.options:
                extra_args.append(option.split(" "))

        if not image:
            image = os.environ["MESOS_DEFAULT_CONTAINER_IMAGE"]
        if not image:
            logger.error("No default container image")
            exit(1)

        url = urlparse(image)
        image = ""
        if url.netloc:
            image = url.netloc
        image += url.path


        # TODO(tarnfeld): Locking

        run_arguments = [
            "-d", # Enable daemon mode
            "--net=bridge" # Bridge the network with the host
        ]

        run_arguments.extend(arguments)
        run_arguments.extend(extra_args)
        run_arguments.extend(["-e", "GLOG_v=5"])
        run_arguments.append(image)
        run_arguments.extend(["sh", "-c"])
        run_arguments.append(executor + " >> stdout 2>>stderr")

        logger.info("Launching docker container")
        _, _, return_code = invoke_docker("run", run_arguments)

        if return_code > 0:
            logger.error("Failed to launch container")
            exit(1)