Ejemplo n.º 1
0
    async def run_on_docker(
        self,
        docker_client: aiodocker.Docker,
        session: aiohttp.ClientSession,
        task_id: str,
        *,
        run_config: Dict[str, Any],
    ) -> Optional[str]:
        """Runs the container image defined in the step's properties.

        Running is done asynchronously.

        Args:
            docker_client: Docker environment to run containers (async).
            wait_on_completion: if True await containers, else do not.
                Awaiting containers is helpful when running a dependency
                graph (like a pipeline), because one step can only
                executed once all its proper ancestors have completed.
        """
        if not all([parent._status == "SUCCESS" for parent in self.parents]):
            # The step cannot be run yet.
            return self._status

        if self._status != "PENDING":
            # The step has already been started.

            # Each parent attempts to start their children when they
            # finish. When all parents finish simultaneously (with all
            # their _status'es being "SUCCESS") not checking whether
            # the child has started or not would lead to multiple start
            # attempts of the child, resulting in errors.
            return self._status

        # TODO: better error handling?
        self._status = "STARTED"
        await update_status(
            self._status,
            task_id,
            session,
            type="step",
            run_endpoint=run_config["run_endpoint"],
            uuid=self.properties["uuid"],
        )

        orchest_mounts = get_orchest_mounts(
            project_dir=_config.PROJECT_DIR,
            host_project_dir=run_config["project_dir"],
            mount_form="docker-engine",
        )

        # add volume mount
        orchest_mounts += get_volume_mounts(run_config, task_id)

        device_requests = get_device_requests(
            self.properties["environment"],
            run_config["project_uuid"],
            form="docker-engine",
        )

        # The working directory relative to the project directory is
        # based on the location of the pipeline, e.g. if the pipeline is
        # in /project-dir/my/project/path/mypipeline.orchest the working
        # directory will be my/project/path/.
        working_dir = os.path.split(run_config["pipeline_path"])[0]

        config = {
            "Image":
            run_config["env_uuid_docker_id_mappings"][
                self.properties["environment"]],
            "Env": [
                f'ORCHEST_STEP_UUID={self.properties["uuid"]}',
                f'ORCHEST_PIPELINE_UUID={run_config["pipeline_uuid"]}',
                f'ORCHEST_PIPELINE_PATH={run_config["pipeline_path"]}',
                f'ORCHEST_PROJECT_UUID={run_config["project_uuid"]}',
                # ORCHEST_MEMORY_EVICTION is never present when running
                # notebooks interactively and otherwise always present,
                # this means eviction of objects from memory can never
                # be triggered when running notebooks interactively.
                # This environment variable being present implies that
                # the Orchest SDK will always emit an eviction message
                # given the choice, this however, does not imply that
                # eviction will actually take place, since the memory
                # server manager will check the pipeline definition
                # settings to decide whetever object eviction should
                # take place or not.
                "ORCHEST_MEMORY_EVICTION=1",
            ],
            "HostConfig": {
                "Binds": orchest_mounts,
                "DeviceRequests": device_requests,
                "GroupAdd": [os.environ.get("ORCHEST_HOST_GID")],
            },
            "Cmd": [
                "/orchest/bootscript.sh",
                "runnable",
                working_dir,
                self.properties["file_path"],
            ],
            "NetworkingConfig": {
                # TODO: should not be hardcoded.
                "EndpointsConfig": {
                    "orchest": {}
                }
            },
            # NOTE: the `'tests-uuid'` key is only used for tests and
            # gets ignored by the `docker_client`.
            "tests-uuid":
            self.properties["uuid"],
        }

        # Starts the container asynchronously, however, it does not wait
        # for completion of the container (like the `docker run` CLI
        # command does). Therefore the option to await the container
        # completion is introduced.
        try:
            container = await docker_client.containers.run(
                config=config,
                name=_config.PIPELINE_STEP_CONTAINER_NAME.format(
                    run_uuid=task_id, step_uuid=self.properties["uuid"]),
            )

            data = await container.wait()

            # The status code will be 0 for "SUCCESS" and -N otherwise.
            # A negative value -N indicates that the child was
            # terminated by signal N (POSIX only).
            if data.get("StatusCode") != 0:
                self._status = "FAILURE"
                logging.error(
                    "Docker container for step %s failed with output:\n%s" % (
                        self.properties["uuid"],
                        "".join(await container.log(stdout=True, stderr=True)),
                    ))
            else:
                self._status = "SUCCESS"

        except Exception as e:
            logging.error("Failed to run Docker container: %s" % e)
            self._status = "FAILURE"

        finally:
            await update_status(
                self._status,
                task_id,
                session,
                type="step",
                run_endpoint=run_config["run_endpoint"],
                uuid=self.properties["uuid"],
            )

        return self._status
Ejemplo n.º 2
0
def launch_docker_kernel(kernel_id, response_addr, spark_context_init_mode):
    # Launches a containerized kernel.

    # Can't proceed if no image was specified.
    image_name = os.environ.get("KERNEL_IMAGE", None)
    if image_name is None:
        sys.exit(
            "ERROR - KERNEL_IMAGE not found in environment - kernel launch terminating!"
        )

    # Container name is composed of KERNEL_USERNAME and KERNEL_ID
    container_name = os.environ.get("KERNEL_USERNAME", "") + "-" + kernel_id

    # Determine network. If EG_DOCKER_NETWORK has not been propagated, fall back to 'bridge'...
    docker_network = os.environ.get("EG_DOCKER_NETWORK", "bridge")

    # Build labels - these will be modelled similar to kubernetes: kernel_id, component, app, ...
    labels = dict()
    labels["kernel_id"] = kernel_id
    labels["component"] = "kernel"
    labels["app"] = "enterprise-gateway"

    # Capture env parameters...
    param_env = dict()
    param_env["EG_RESPONSE_ADDRESS"] = response_addr
    param_env["KERNEL_SPARK_CONTEXT_INIT_MODE"] = spark_context_init_mode

    # Since the environment is specific to the kernel (per env stanza of kernelspec, KERNEL_ and ENV_WHITELIST)
    # just add the env here.
    param_env.update(os.environ)
    param_env.pop(
        "PATH"
    )  # Let the image PATH be used.  Since this is relative to images, we're probably safe.

    user = param_env.get("KERNEL_UID")
    group = param_env.get("KERNEL_GID")

    # setup common args
    kwargs = dict()
    kwargs["name"] = container_name
    kwargs["user"] = user
    kwargs["labels"] = labels

    client = DockerClient.from_env()
    if swarm_mode:
        print("Started Jupyter kernel in swarm-mode")
        networks = list()
        networks.append(docker_network)
        mounts = list()
        mounts.append(
            "/usr/local/share/jupyter/kernels:/usr/local/share/jupyter/kernels:ro"
        )
        endpoint_spec = EndpointSpec(mode="dnsrr")
        restart_policy = RestartPolicy(condition="none")

        # finish args setup
        kwargs["env"] = param_env
        kwargs["endpoint_spec"] = endpoint_spec
        kwargs["restart_policy"] = restart_policy
        kwargs["container_labels"] = labels
        kwargs["networks"] = networks
        kwargs["groups"] = [group, "100"]
        if param_env.get("KERNEL_WORKING_DIR"):
            kwargs["workdir"] = param_env.get("KERNEL_WORKING_DIR")
        # kwargs['mounts'] = mounts   # Enable if necessary
        # print("service args: {}".format(kwargs))  # useful for debug
        kernel_service = client.services.create(image_name, **kwargs)
    else:
        print("Started Jupyter kernel in normal docker mode")

        # Note: seems to me that the kernels don't need to be mounted on a container that runs a single kernel

        # mount the kernel working directory from EG to kernel container

        # finish args setup
        kwargs["hostname"] = container_name
        kwargs["environment"] = param_env
        kwargs["remove"] = remove_container
        kwargs["network"] = docker_network
        kwargs["group_add"] = [
            group,
            "100",
        ]  # NOTE: "group_add" for newer versions of docker
        kwargs["detach"] = True
        if param_env.get("KERNEL_WORKING_DIR"):
            kwargs["working_dir"] = param_env.get("KERNEL_WORKING_DIR")

        # print("container args: {}".format(kwargs))  # useful for debug
        orchest_mounts = get_orchest_mounts(
            project_dir=param_env.get("KERNEL_WORKING_DIR"),
            host_project_dir=param_env.get("ORCHEST_HOST_PROJECT_DIR"),
        )
        orchest_mounts += [
            get_volume_mount(
                param_env.get("ORCHEST_PIPELINE_UUID"),
                param_env.get("ORCHEST_PROJECT_UUID"),
            )
        ]

        # Extract environment_uuid from the image name (last 36 characters)
        extracted_environment_uuid = image_name[-36:]

        device_requests = get_device_requests(
            extracted_environment_uuid, param_env.get("ORCHEST_PROJECT_UUID")
        )

        kernel_container = client.containers.run(
            image_name, mounts=orchest_mounts, device_requests=device_requests, **kwargs
        )
Ejemplo n.º 3
0
    async def run_on_docker(
        self,
        docker_client: aiodocker.Docker,
        session: aiohttp.ClientSession,
        task_id: str,
        *,
        run_config: Dict[str, Any],
    ) -> Optional[str]:
        """Runs the container image defined in the step's properties.

        Running is done asynchronously.

        Args:
            docker_client: Docker environment to run containers (async).
            wait_on_completion: if True await containers, else do not.
                Awaiting containers is helpful when running a dependency
                graph (like a pipeline), because one step can only
                executed once all its proper ancestors have completed.
        """
        if not all([parent._status == "SUCCESS" for parent in self.parents]):
            # The step cannot be run yet.
            return self._status

        orchest_mounts = get_orchest_mounts(
            project_dir=_config.PROJECT_DIR,
            host_project_dir=run_config["project_dir"],
            mount_form="docker-engine",
        )

        # add volume mount
        orchest_mounts += get_volume_mounts(run_config, task_id)

        device_requests = get_device_requests(
            self.properties["environment"],
            run_config["project_uuid"],
            form="docker-engine",
        )

        # the working directory relative to the project directory is based on the location of the pipeline
        # e.g. if the pipeline is in
        #   /project-dir/my/project/path/mypipeline.orchest the working directory will be
        #   my/project/path/
        working_dir = os.path.split(run_config["pipeline_path"])[0]

        config = {
            "Image":
            _config.ENVIRONMENT_IMAGE_NAME.format(
                project_uuid=run_config["project_uuid"],
                environment_uuid=self.properties["environment"],
            ),
            "Env": [
                f'ORCHEST_STEP_UUID={self.properties["uuid"]}',
                f'ORCHEST_PIPELINE_UUID={run_config["pipeline_uuid"]}',
                f'ORCHEST_PIPELINE_PATH={run_config["pipeline_path"]}',
                f'ORCHEST_PROJECT_UUID={run_config["project_uuid"]}',
                "ORCHEST_MEMORY_EVICTION=1",
            ],
            "HostConfig": {
                "Binds": orchest_mounts,
                "DeviceRequests": device_requests,
            },
            "Cmd": [
                "/orchest/bootscript.sh",
                "runnable",
                working_dir,
                self.properties["file_path"],
            ],
            "NetworkingConfig": {
                "EndpointsConfig": {
                    "orchest": {}
                }  # TODO: should not be hardcoded.
            },
            # NOTE: the `'tests-uuid'` key is only used for tests and
            # gets ignored by the `docker_client`.
            "tests-uuid":
            self.properties["uuid"],
        }

        # Starts the container asynchronously, however, it does not wait
        # for completion of the container (like the `docker run` CLI
        # command does). Therefore the option to await the container
        # completion is introduced.
        try:
            container = await docker_client.containers.run(config=config)
        except Exception as e:
            print("Exception", e)

        # TODO: error handling?
        self._status = "STARTED"
        await update_status(
            self._status,
            task_id,
            session,
            type="step",
            run_endpoint=run_config["run_endpoint"],
            uuid=self.properties["uuid"],
        )

        data = await container.wait()

        # The status code will be 0 for "SUCCESS" and -N otherwise. A
        # negative value -N indicates that the child was terminated
        # by signal N (POSIX only).
        self._status = "FAILURE" if data.get("StatusCode") else "SUCCESS"
        await update_status(
            self._status,
            task_id,
            session,
            type="step",
            run_endpoint=run_config["run_endpoint"],
            uuid=self.properties["uuid"],
        )

        # TODO: get the logs (errors are piped to stdout, thus running
        #       "docker logs" should get them). Find the appropriate
        #       way to return them.
        if self._status == "FAILURE":
            pass

        return self._status
Ejemplo n.º 4
0
def launch_docker_kernel(kernel_id, response_addr, spark_context_init_mode):
    # Launches a containerized kernel.

    # Can't proceed if no image was specified.
    image_name = os.environ.get("KERNEL_IMAGE", None)
    if image_name is None:
        sys.exit(
            "ERROR - KERNEL_IMAGE not found in environment - kernel launch terminating!"
        )

    # Container name is composed of KERNEL_USERNAME and KERNEL_ID
    container_name = os.environ.get("KERNEL_USERNAME", "") + "-" + kernel_id

    # Determine network. If EG_DOCKER_NETWORK has not been propagated,
    # fall back to 'bridge'...
    docker_network = os.environ.get("EG_DOCKER_NETWORK", "bridge")

    # Build labels - these will be modelled similar to kubernetes:
    # kernel_id, component, app, ...
    labels = dict()
    labels["kernel_id"] = kernel_id
    labels["component"] = "kernel"
    labels["app"] = "enterprise-gateway"

    # Capture env parameters...
    param_env = dict()
    param_env["EG_RESPONSE_ADDRESS"] = response_addr
    param_env["KERNEL_SPARK_CONTEXT_INIT_MODE"] = spark_context_init_mode

    # Since the environment is specific to the kernel (per env stanza of
    # kernelspec, KERNEL_ and ENV_WHITELIST) just add the env here.
    param_env.update(os.environ)
    param_env.pop("PATH")
    # Let the image PATH be used. Since this is relative to images,
    # we're probably safe.

    # setup common args
    kwargs = dict()
    kwargs["name"] = container_name
    kwargs["labels"] = labels

    client = DockerClient.from_env()
    print("Started Jupyter kernel in normal docker mode")

    # Note: seems to me that the kernels don't need to be mounted on a
    # container that runs a single kernel mount the kernel working
    # directory from EG to kernel container

    # finish args setup
    kwargs["hostname"] = container_name
    kwargs["environment"] = param_env
    kwargs["remove"] = remove_container
    kwargs["network"] = docker_network
    kwargs["group_add"] = [param_env.get("ORCHEST_HOST_GID")]
    kwargs["detach"] = True
    if param_env.get("KERNEL_WORKING_DIR"):
        kwargs["working_dir"] = param_env.get("KERNEL_WORKING_DIR")

    # print("container args: {}".format(kwargs))  # useful for debug
    orchest_mounts = get_orchest_mounts(
        project_dir=_config.PROJECT_DIR,
        host_user_dir=os.path.join(param_env.get("ORCHEST_HOST_PROJECT_DIR"),
                                   os.pardir, os.pardir, "data"),
        host_project_dir=param_env.get("ORCHEST_HOST_PROJECT_DIR"),
    )
    volume_source, volume_spec = get_volume_mount(
        param_env.get("ORCHEST_PIPELINE_UUID"),
        param_env.get("ORCHEST_PROJECT_UUID"),
    )
    orchest_mounts[volume_source] = volume_spec

    # Extract environment_uuid from the image name (last 36 characters)
    extracted_environment_uuid = image_name[-36:]

    device_requests = get_device_requests(
        extracted_environment_uuid, param_env.get("ORCHEST_PROJECT_UUID"))

    client.containers.run(image_name,
                          volumes=orchest_mounts,
                          device_requests=device_requests,
                          **kwargs)