Beispiel #1
0
    def submit(self, data: Data, argv):
        """Run process with SLURM.

        For details, see
        :meth:`~resolwe.flow.managers.workload_connectors.base.BaseConnector.submit`.
        """
        limits = data.get_resource_limits()
        logger.debug(
            __(
                "Connector '{}' running for Data with id {} ({}).",
                self.__class__.__module__,
                data.id,
                repr(argv),
            ))

        # Compute target partition.
        partition = getattr(settings, "FLOW_SLURM_PARTITION_DEFAULT", None)
        if data.process.slug in getattr(settings,
                                        "FLOW_SLURM_PARTITION_OVERRIDES", {}):
            partition = settings.FLOW_SLURM_PARTITION_OVERRIDES[
                data.process.slug]

        try:
            # Make sure the resulting file is executable on creation.
            runtime_dir = storage_settings.FLOW_VOLUMES["runtime"]["config"][
                "path"]
            script_path = os.path.join(runtime_dir,
                                       "slurm-{}.sh".format(data.pk))
            file_descriptor = os.open(script_path,
                                      os.O_WRONLY | os.O_CREAT,
                                      mode=0o555)
            with os.fdopen(file_descriptor, "wt") as script:
                script.write("#!/bin/bash\n")
                script.write(
                    "#SBATCH --mem={}M\n".format(limits["memory"] +
                                                 EXECUTOR_MEMORY_OVERHEAD))
                script.write("#SBATCH --cpus-per-task={}\n".format(
                    limits["cores"]))
                if partition:
                    script.write("#SBATCH --partition={}\n".format(partition))
                    script.write(
                        "#SBATCH --output slurm-url-{}-job-%j.out\n".format(
                            data.location.subpath))

                # Render the argument vector into a command line.
                line = " ".join(map(shlex.quote, argv))
                script.write(line + "\n")

            command = ["/usr/bin/env", "sbatch", script_path]
            subprocess.Popen(command,
                             cwd=runtime_dir,
                             stdin=subprocess.DEVNULL).wait()
        except OSError as err:
            logger.error(
                __(
                    "OSError occurred while preparing SLURM script for Data {}: {}",
                    data.id,
                    err,
                ))
Beispiel #2
0
    def start(self, data: Data, listener_connection: Tuple[str, str, str]):
        """Start process execution.

        Construct kubernetes job description and pass it to the kubernetes.
        """
        container_environment = self._prepare_environment(
            data, listener_connection)

        location_subpath = Path(data.location.subpath)

        # Create kubernetes API every time otherwise it will time out
        # eventually and raise API exception.
        try:
            kubernetes.config.load_kube_config()
        except kubernetes.config.config_exception.ConfigException:
            kubernetes.config.load_incluster_config()

        batch_api = kubernetes.client.BatchV1Api()
        core_api = kubernetes.client.CoreV1Api()

        container_name_prefix = (getattr(settings, "FLOW_EXECUTOR", {}).get(
            "CONTAINER_NAME_PREFIX", "resolwe").replace("_", "-").lower())
        container_name = self._generate_container_name(container_name_prefix,
                                                       data.pk)

        # Set resource limits.
        requests = dict()
        limits = data.get_resource_limits()

        requests["cpu"] = limits.pop("cores")
        limits["cpu"] = requests["cpu"] + 1
        # Overcommit CPU by 20%.
        requests["cpu"] *= 0.8

        # The memory in the database is stored in megabytes but the kubertenes
        # requires memory in bytes.
        # We request 10% less memory than stored in the database and set limit
        # at 10% more plus KUBERNETES_MEMORY_HARD_LIMIT_BUFFER. The processes
        # usually require 16GB, 32GB... and since the node usualy has 64GB of
        # memory and some of it is consumed by the system processes only one
        # process process that requires 32GB can run on a node instead of 2.

        requests["memory"] = 0.9 * limits["memory"]
        limits["memory"] = 1.1 * limits[
            "memory"] + KUBERNETES_MEMORY_HARD_LIMIT_BUFFER
        limits["memory"] *= 2**20  # 2 ** 20 = mebibyte
        requests["memory"] *= 2**20

        # Get the limits and requests for the communicator container.
        communicator_limits = getattr(
            settings,
            "FLOW_KUBERNETES_COMMUNICATOR_LIMITS",
            {
                "memory": "256M",
                "cpu": 0.1
            },
        )
        communicator_requests = getattr(
            settings,
            "FLOW_KUBERNETES_COMMUNICATOR_REQUESTS",
            {
                "memory": "256M",
                "cpu": 0.1
            },
        )

        resources = data.process.requirements.get("resources", {})
        network = "bridge"
        use_host_network = False
        if "network" in resources:
            # Configure Docker network mode for the container (if specified).
            # By default, current Docker versions use the 'bridge' mode which
            # creates a network stack on the default Docker bridge.
            network = getattr(settings, "FLOW_EXECUTOR", {}).get("NETWORK", "")
            use_host_network = network == "host"

        # Generate and set seccomp policy to limit syscalls.
        security_context = {
            "runAsUser": os.getuid(),
            "runAsGroup": os.getgid(),
            "allowPrivilegeEscalation": False,
            "privileged": False,
            "capabilities": {
                "drop": ["ALL"]
            },
        }

        annotations = dict()

        # Do not evict job from node.
        annotations["cluster-autoscaler.kubernetes.io/safe-to-evict"] = "false"

        if not getattr(settings, "FLOW_DOCKER_DISABLE_SECCOMP", False):
            # The path is a relative path in the kubelet root
            # directory:
            # <seccomp_root>/<path>, where <seccomp_root> is defined via the
            # --seccomp-profile-root flag on the Kubelet. If the
            # --seccomp-profile-root flag is not defined, the default path will
            # be used, which is <root-dir>/seccomp where <root-dir> is
            # specified by the --root-dir flag.
            # https://kubernetes.io/docs/concepts/policy/pod-security-policy/
            #
            # The file is transfered to kubelets with daemonset ? Currently I
            # mount my /tmp directory to the /seccomp directory in minikube.
            annotations[
                "seccomp.security.alpha.kubernetes.io/pod"] = "runtime/default"

        mapper = getattr(settings, "FLOW_CONTAINER_IMAGE_MAP", {})
        communicator_image = getattr(
            settings,
            "FLOW_DOCKER_COMMUNICATOR_IMAGE",
            "public.ecr.aws/s4q6j6e8/resolwe/com:latest",
        )
        communicator_image = self._image_mapper(communicator_image, mapper)

        requirements = data.process.requirements.get("executor",
                                                     {}).get("docker", {})
        processing_container_image = str(
            requirements.get(
                "image",
                getattr(
                    settings,
                    "FLOW_DOCKER_DEFAULT_PROCESSING_CONTAINER_IMAGE",
                    "public.ecr.aws/s4q6j6e8/resolwe/base:ubuntu-20.04",
                ),
            ), )
        processing_container_image = self._image_mapper(
            processing_container_image, mapper)

        affinity = {}
        kubernetes_affinity = getattr(settings, "FLOW_KUBERNETES_AFFINITY",
                                      None)
        if kubernetes_affinity:
            affinity = {
                "nodeAffinity": {
                    "requiredDuringSchedulingIgnoredDuringExecution": {
                        "nodeSelectorTerms": [{
                            "matchExpressions": [{
                                "key":
                                "nodegroup",
                                "operator":
                                "In",
                                "values": [kubernetes_affinity],
                            }]
                        }]
                    }
                }
            }

        job_type = dict(
            Process.SCHEDULING_CLASS_CHOICES)[data.process.scheduling_class]
        job_description = {
            "apiVersion": "batch/v1",
            "kind": "Job",
            "metadata": {
                "name": sanitize_kubernetes_label(container_name)
            },
            "spec": {
                # Keep finished pods around for ten seconds. If job is not
                # deleted its PVC claim persists and it causes PV to stay
                # around.
                # This can be changed by running a cron job that periodically
                # checks for PVC that can be deleted.
                "ttlSecondsAfterFinished": 300,
                "template": {
                    "metadata": {
                        "name": sanitize_kubernetes_label(container_name),
                        "labels": {
                            "app": "resolwe",
                            "data_id": str(data.pk),
                            "process":
                            sanitize_kubernetes_label(data.process.slug),
                            "job_type": sanitize_kubernetes_label(job_type),
                        },
                        "annotations": annotations,
                    },
                    "spec": {
                        "affinity":
                        affinity,
                        "hostNetwork":
                        use_host_network,
                        "volumes":
                        self._volumes(data.id, location_subpath, core_api),
                        "initContainers": [
                            {
                                "name":
                                sanitize_kubernetes_label(
                                    f"{container_name}-init"),
                                "image":
                                communicator_image,
                                "imagePullPolicy":
                                "Always",
                                "workingDir":
                                "/",
                                "command": ["/usr/local/bin/python3"],
                                "args": ["-m", "executors.init_container"],
                                "securityContext": {
                                    "privileged": True
                                },
                                "volumeMounts":
                                self._init_container_mountpoints(),
                                "env":
                                container_environment,
                            },
                        ],
                        "containers": [
                            {
                                "name":
                                sanitize_kubernetes_label(container_name),
                                "image":
                                processing_container_image,
                                "resources": {
                                    "limits": limits,
                                    "requests": requests
                                },
                                "securityContext":
                                security_context,
                                "env":
                                container_environment,
                                "workingDir":
                                os.fspath(constants.PROCESSING_VOLUME),
                                "imagePullPolicy":
                                "Always",
                                "command": ["/usr/bin/python3"],
                                "args": ["/processing.py"],
                                "volumeMounts":
                                self._processing_mountpoints(
                                    location_subpath,
                                    data.process.run.get("language", None),
                                ),
                            },
                            {
                                "name":
                                sanitize_kubernetes_label(
                                    f"{container_name}-communicator"),
                                "image":
                                communicator_image,
                                "imagePullPolicy":
                                "Always",
                                "resources": {
                                    "limits": communicator_limits,
                                    "requests": communicator_requests,
                                },
                                "securityContext":
                                security_context,
                                "env":
                                container_environment,
                                "command": ["/usr/local/bin/python3"],
                                "args": ["/startup.py"],
                                "volumeMounts":
                                self._communicator_mountpoints(
                                    location_subpath),
                            },
                        ],
                        "restartPolicy":
                        "Never",
                    },
                },
                "backoffLimit": 0,
            },
        }
        start_time = time.time()

        processing_name = constants.PROCESSING_VOLUME_NAME
        input_name = constants.INPUTS_VOLUME_NAME
        if self._should_create_pvc(
                storage_settings.FLOW_VOLUMES[processing_name]):
            claim_name = unique_volume_name(
                storage_settings.FLOW_VOLUMES[processing_name]["config"]
                ["name"],
                data.id,
            )
            claim_size = limits.pop("storage", 200) * (
                2**30)  # Default 200 gibibytes
            core_api.create_namespaced_persistent_volume_claim(
                body=self._persistent_volume_claim(
                    claim_name,
                    claim_size,
                    storage_settings.FLOW_VOLUMES[processing_name]["config"],
                ),
                namespace=self.kubernetes_namespace,
                _request_timeout=KUBERNETES_TIMEOUT,
            )
        if input_name in storage_settings.FLOW_VOLUMES:
            if self._should_create_pvc(
                    storage_settings.FLOW_VOLUMES[input_name]):
                claim_size = self._data_inputs_size(data)
                claim_name = unique_volume_name(
                    storage_settings.FLOW_VOLUMES[input_name]["config"]
                    ["name"],
                    data.id,
                )
                core_api.create_namespaced_persistent_volume_claim(
                    body=self._persistent_volume_claim(
                        claim_name,
                        claim_size,
                        storage_settings.FLOW_VOLUMES[input_name]["config"],
                    ),
                    namespace=self.kubernetes_namespace,
                    _request_timeout=KUBERNETES_TIMEOUT,
                )

        logger.debug(f"Creating namespaced job: {job_description}")
        batch_api.create_namespaced_job(
            body=job_description,
            namespace=self.kubernetes_namespace,
            _request_timeout=KUBERNETES_TIMEOUT,
        )
        end_time = time.time()
        logger.info(
            "It took {:.2f}s to send config to kubernetes".format(end_time -
                                                                  start_time))
Beispiel #3
0
        def process_data_object(data: Data):
            """Process a single data object."""
            # Lock for update. Note that we want this transaction to be as short as possible in
            # order to reduce contention and avoid deadlocks. This is why we do not lock all
            # resolving objects for update, but instead only lock one object at a time. This
            # allows managers running in parallel to process different objects.
            data = Data.objects.select_for_update().get(pk=data.pk)
            if data.status != Data.STATUS_RESOLVING:
                # The object might have already been processed while waiting for the lock to be
                # obtained. In this case, skip the object.
                return

            dep_status = dependency_status(data)

            if dep_status == Data.STATUS_ERROR:
                data.status = Data.STATUS_ERROR
                data.process_error.append(
                    "One or more inputs have status ERROR")
                data.process_rc = 1
                data.save()
                if hasattr(data, "worker"):
                    data.worker.status = Worker.STATUS_ERROR_PREPARING
                    data.worker.save(update_fields=["status"])

                return

            elif dep_status != Data.STATUS_DONE:
                return

            run_in_executor = False
            if data.process.run:
                try:
                    # Check if execution engine is sound and evaluate workflow.
                    execution_engine_name = data.process.run.get(
                        "language", None)
                    execution_engine = self.get_execution_engine(
                        execution_engine_name)
                    run_in_executor = execution_engine_name != "workflow"
                    if not run_in_executor:
                        execution_engine.evaluate(data)
                    else:
                        # Set allocated resources
                        resource_limits = data.get_resource_limits()
                        data.process_memory = resource_limits["memory"]
                        data.process_cores = resource_limits["cores"]

                except (ExecutionError, InvalidEngineError) as error:
                    data.status = Data.STATUS_ERROR
                    data.process_error.append(
                        "Error in process script: {}".format(error))
                    data.save()
                    if hasattr(data, "worker"):
                        data.worker.status = Worker.STATUS_ERROR_PREPARING
                        data.worker.save(update_fields=["status"])

                    return
            if data.status != Data.STATUS_DONE:
                # The data object may already be marked as done by the execution engine. In this
                # case we must not revert the status to STATUS_WAITING.
                data.status = Data.STATUS_WAITING
            data.save(render_name=True)

            # Actually run the object only if there was nothing with the
            # transaction and was not already evaluated.
            if run_in_executor:
                transaction.on_commit(
                    # Make sure the closure gets the right values here, since they're
                    # changed in the loop.
                    lambda d=data: self._data_execute(d))