Ejemplo n.º 1
0
def start_non_interactive_pipeline_run(
    self,
    job_uuid,
    project_uuid,
    pipeline_definition: PipelineDefinition,
    run_config: Dict[str, Union[str, Dict[str, str]]],
) -> str:
    """Starts a non-interactive pipeline run.

    It is a pipeline run that is part of a job.

    Args:
        job_uuid: UUID of the job.
        project_uuid: UUID of the project.
        pipeline_definition: A json description of the pipeline.
        run_config: Configuration of the run for the compute backend.
            Example: {
                'host_user_dir': '/home/../userdir',
                'project_dir': '/home/../pipelines/uuid',
                'env_uuid_docker_id_mappings': {
                    'b6527b0b-bfcc-4aff-91d1-37f9dfd5d8e8':
                        'sha256:61f82126945bb25dd85d6a5b122a1815df1c0c5f91621089cde0938be4f698d4'
                }
            }

    Returns:
        Status of the pipeline run. "FAILURE" or "SUCCESS".

    """
    pipeline_uuid = pipeline_definition["uuid"]

    job_dir = os.path.join("/userdir", "jobs", project_uuid, pipeline_uuid, job_uuid)
    snapshot_dir = os.path.join(job_dir, "snapshot")
    run_dir = os.path.join(job_dir, self.request.id)

    # TODO: It should not copy all directories, e.g. not "data".
    # Copy the contents of `snapshot_dir` to the new (not yet existing
    # folder) `run_dir` (that will then be created by `copytree`).
    # copytree(snapshot_dir, run_dir)
    os.system('cp -R "%s" "%s"' % (snapshot_dir, run_dir))

    # Update the `run_config` for the interactive pipeline run. The
    # pipeline run should execute on the `run_dir` as its
    # `project_dir`. Note that the `project_dir` inside the
    # `run_config` has to be the abs path w.r.t. the host because it is
    # used by the `docker.sock` when mounting the dir to the container
    # of a step.
    host_base_user_dir = os.path.split(run_config["host_user_dir"])[0]

    # To join the paths, the `run_dir` cannot start with `/userdir/...`
    # but should start as `userdir/...`
    run_config["project_dir"] = os.path.join(host_base_user_dir, run_dir[1:])
    run_config["run_endpoint"] = f"jobs/{job_uuid}"
    run_config["pipeline_uuid"] = pipeline_uuid
    run_config["project_uuid"] = project_uuid

    # Overwrite the `pipeline.json`, that was copied from the snapshot,
    # with the new `pipeline.json` that contains the new parameters for
    # every step.
    pipeline_json = os.path.join(run_dir, run_config["pipeline_path"])
    with open(pipeline_json, "w") as f:
        json.dump(pipeline_definition, f, indent=4, sort_keys=True)

    with launch_noninteractive_session(
        docker_client,
        self.request.id,
        project_uuid,
        run_config["pipeline_path"],
        run_config["project_dir"],
    ):
        status = run_pipeline(
            pipeline_definition, project_uuid, run_config, task_id=self.request.id
        )

    return status
Ejemplo n.º 2
0
def start_non_interactive_pipeline_run(
    self,
    job_uuid,
    project_uuid,
    pipeline_definition: PipelineDefinition,
    run_config: Dict[str, Union[str, Dict[str, str]]],
) -> str:
    """Starts a non-interactive pipeline run.

    It is a pipeline run that is part of a job.

    Args:
        job_uuid: UUID of the job.
        project_uuid: UUID of the project.
        pipeline_definition: A json description of the pipeline.
        run_config: Configuration of the run for the compute backend.
            Example: {
                'userdir_pvc': 'userdir-pvc',
                'project_dir': 'pipelines/uuid',
                'env_uuid_to_image': {
                    'b6527b0b-bfcc-4aff-91d1-37f9dfd5d8e8':
                        'sha256:61f82126945bb25dd85d6a5b122a1815df1c0c5f91621089cde0938be4f698d4'
                }
            }

    Returns:
        Status of the pipeline run. "FAILURE" or "SUCCESS".

    """
    pipeline_uuid = pipeline_definition["uuid"]

    job_dir = os.path.join("/userdir", "jobs", project_uuid, pipeline_uuid, job_uuid)
    snapshot_dir = os.path.join(job_dir, "snapshot")
    run_dir = os.path.join(job_dir, self.request.id)

    # Copy the contents of `snapshot_dir` to the new (not yet existing
    # folder) `run_dir`. No need to use_gitignore since the snapshot
    # was copied with use_gitignore=True.
    copytree(snapshot_dir, run_dir, use_gitignore=False)

    # Update the `run_config` for the interactive pipeline run. The
    # pipeline run should execute on the `run_dir` as its
    # `project_dir`. Note that the `project_dir` inside the
    # `run_config` has to be relative to userdir_pvc as it is used
    # by k8s as a subpath of userdir_pvc
    userdir_pvc = run_config["userdir_pvc"]

    # For non interactive runs the session uuid is equal to the task
    # uuid, which is actually the pipeline run uuid.
    session_uuid = self.request.id
    run_config["session_uuid"] = session_uuid
    run_config["session_type"] = "noninteractive"
    run_config["pipeline_uuid"] = pipeline_uuid
    run_config["project_uuid"] = project_uuid
    run_config["project_dir"] = run_dir
    run_config["run_endpoint"] = f"jobs/{job_uuid}"

    # Overwrite the `pipeline.json`, that was copied from the snapshot,
    # with the new `pipeline.json` that contains the new parameters for
    # every step.
    pipeline_json = os.path.join(run_dir, run_config["pipeline_path"])
    with open(pipeline_json, "w") as f:
        json.dump(pipeline_definition, f, indent=4, sort_keys=True)

    # Note that run_config contains user_env_variables, which is of
    # interest for the session_config.
    session_config = copy.deepcopy(run_config)
    session_config.pop("env_uuid_to_image")
    session_config.pop("run_endpoint")
    session_config["userdir_pvc"] = userdir_pvc
    session_config["services"] = pipeline_definition.get("services", {})
    session_config["env_uuid_to_image"] = run_config["env_uuid_to_image"]

    with launch_noninteractive_session(
        session_uuid,
        session_config,
        lambda: AbortableAsyncResult(session_uuid).is_aborted(),
    ):
        status = run_pipeline(
            pipeline_definition,
            run_config,
            session_uuid,
            task_id=self.request.id,
        )

    return status