Example #1
0
    def catch_api_proxy_sessions():

        json_obj = request.json

        json_obj["project_dir"] = get_project_directory(
            json_obj["project_uuid"], host_path=True
        )

        json_obj["pipeline_path"] = pipeline_uuid_to_path(
            json_obj["pipeline_uuid"],
            json_obj["project_uuid"],
        )

        json_obj["host_userdir"] = app.config["HOST_USER_DIR"]

        pipeline_json = get_pipeline_json(
            json_obj["pipeline_uuid"], json_obj["project_uuid"]
        )
        json_obj["settings"] = pipeline_json.get("settings", {})

        resp = requests.post(
            "http://" + app.config["ORCHEST_API_ADDRESS"] + "/api/sessions/",
            json=json_obj,
        )

        return resp.content, resp.status_code, resp.headers.items()
Example #2
0
    def pipelines_get(project_uuid):

        try:
            with TwoPhaseExecutor(db.session) as tpe:
                SyncProjectPipelinesDBState(tpe).transaction(project_uuid)
        except Exception as e:
            msg = (
                "Error during project pipelines synchronization of "
                f"{project_uuid}: {str(e)}."
            )
            return jsonify({"message": msg}), 500

        pipelines = Pipeline.query.filter(Pipeline.project_uuid == project_uuid).all()
        pipelines_augmented = []

        for pipeline in pipelines:

            pipeline_augmented = {
                "uuid": pipeline.uuid,
                "path": pipeline.path,
            }

            pipeline_json = get_pipeline_json(pipeline.uuid, pipeline.project_uuid)
            if pipeline_json is not None:
                pipeline_augmented["name"] = pipeline_json["name"]
            else:
                pipeline_augmented["name"] = "Warning: pipeline file was not found."

            pipelines_augmented.append(pipeline_augmented)

        json_string = json.dumps({"success": True, "result": pipelines_augmented})

        return json_string, 200, {"content-type": "application/json"}
Example #3
0
    def pipelines_get_all():

        pipelines = Pipeline.query.all()
        pipelines_augmented = []

        for pipeline in pipelines:

            pipeline_augmented = {
                "uuid": pipeline.uuid,
                "path": pipeline.path,
                "project_uuid": pipeline.project_uuid,
            }

            pipeline_json = get_pipeline_json(pipeline.uuid,
                                              pipeline.project_uuid)
            if pipeline_json is not None:
                pipeline_augmented["name"] = pipeline_json["name"]
            else:
                pipeline_augmented[
                    "name"] = "Warning: pipeline file was not found."

            pipelines_augmented.append(pipeline_augmented)

        json_string = json.dumps({
            "success": True,
            "result": pipelines_augmented
        })

        return json_string, 200, {"content-type": "application/json"}
Example #4
0
    def catch_api_proxy_jobs_post():

        json_obj = request.json

        pipeline_path = pipeline_uuid_to_path(json_obj["pipeline_uuid"],
                                              json_obj["project_uuid"])
        json_obj["pipeline_run_spec"]["run_config"] = {
            "host_user_dir":
            app.config["HOST_USER_DIR"],
            "project_dir":
            get_project_directory(json_obj["project_uuid"], host_path=True),
            "pipeline_path":
            pipeline_path,
        }

        json_obj["pipeline_definition"] = get_pipeline_json(
            json_obj["pipeline_uuid"], json_obj["project_uuid"])

        # Validate whether the pipeline contains environments
        # that do not exist in the project.
        project_environments = get_environments(json_obj["project_uuid"])
        project_environment_uuids = set(
            [environment.uuid for environment in project_environments])
        pipeline_environment_uuids = get_environments_from_pipeline_json(
            json_obj["pipeline_definition"])

        missing_environment_uuids = (pipeline_environment_uuids -
                                     project_environment_uuids)
        if len(missing_environment_uuids) > 0:
            missing_environment_uuids_str = ", ".join(
                missing_environment_uuids)
            return (
                jsonify({
                    "message":
                    "The pipeline definition references environments "
                    f"that do not exist in the project. "
                    "The following environments do not exist:"
                    f" [{missing_environment_uuids_str}].\n\n Please make sure all"
                    " pipeline steps are assigned an environment that exists"
                    " in the project."
                }),
                500,
            )

        # Jobs should always have eviction enabled.
        json_obj["pipeline_definition"]["settings"]["auto_eviction"] = True

        job_uuid = str(uuid.uuid4())
        json_obj["uuid"] = job_uuid
        create_job_directory(job_uuid, json_obj["pipeline_uuid"],
                             json_obj["project_uuid"])

        resp = requests.post(
            "http://" + app.config["ORCHEST_API_ADDRESS"] + "/api/jobs/",
            json=json_obj,
        )

        analytics.send_job_create(app, json_obj)
        return resp.content, resp.status_code, resp.headers.items()
Example #5
0
    def pipelines_json(project_uuid, pipeline_uuid):

        pipeline_json_path = get_pipeline_path(
            pipeline_uuid,
            project_uuid,
            request.args.get("job_uuid"),
            request.args.get("pipeline_run_uuid"),
        )

        if request.method == "POST":

            pipeline_directory = get_pipeline_directory(
                pipeline_uuid,
                project_uuid,
                request.args.get("job_uuid"),
                request.args.get("pipeline_run_uuid"),
            )

            # Parse JSON.
            pipeline_json = json.loads(request.form.get("pipeline_json"))

            # Side effect: for each Notebook in de pipeline.json set the
            # correct kernel.
            pipeline_set_notebook_kernels(
                pipeline_json, pipeline_directory, project_uuid
            )

            with open(pipeline_json_path, "w") as json_file:
                json.dump(pipeline_json, json_file, indent=4, sort_keys=True)

            # Analytics call.
            send_anonymized_pipeline_definition(app, pipeline_json)

            return jsonify({"message": "Successfully saved pipeline."})

        elif request.method == "GET":

            if not os.path.isfile(pipeline_json_path):
                return (
                    jsonify(
                        {
                            "success": False,
                            "reason": ".orchest file doesn't exist at location %s"
                            % pipeline_json_path,
                        }
                    ),
                    404,
                )
            else:

                pipeline_json = get_pipeline_json(pipeline_uuid, project_uuid)
                # json.dumps because the front end expects it as a
                # string.
                return jsonify(
                    {"success": True, "pipeline_json": json.dumps(pipeline_json)}
                )

            return ""
Example #6
0
    def catch_api_proxy_sessions_post():

        json_obj = request.json

        project_uuid = json_obj["project_uuid"]
        pipeline_uuid = json_obj["pipeline_uuid"]

        # Lock the project and pipeline row to avoid race conditions
        # with RenameProject and MovePipeline, which are locking for
        # update themselves.
        Project.query.with_for_update().filter(
            Project.uuid == project_uuid, ).one()
        Pipeline.query.with_for_update().filter(
            Pipeline.project_uuid == project_uuid,
            Pipeline.uuid == pipeline_uuid,
        ).one()

        pipeline_path = pipeline_uuid_to_path(
            json_obj["pipeline_uuid"],
            json_obj["project_uuid"],
        )

        project_dir = get_project_directory(json_obj["project_uuid"])

        services = get_pipeline_json(json_obj["pipeline_uuid"],
                                     json_obj["project_uuid"]).get(
                                         "services", {})

        session_config = {
            "project_uuid": project_uuid,
            "pipeline_uuid": pipeline_uuid,
            "pipeline_path": pipeline_path,
            "project_dir": project_dir,
            "userdir_pvc": app.config["USERDIR_PVC"],
            "services": services,
        }

        resp = requests.post(
            "http://" + app.config["ORCHEST_API_ADDRESS"] + "/api/sessions/",
            json=session_config,
        )

        analytics.send_event(
            app,
            analytics.Event.SESSION_START,
            {
                "project_uuid": project_uuid,
                "pipeline_uuid": pipeline_uuid,
                "services": services,
            },
        )
        return resp.content, resp.status_code, resp.headers.items()
Example #7
0
def create_job_spec(config) -> dict:
    """Returns a job spec based on the provided configuration.

    Args: Initial configuration with which the job spec should be built.
        project_uuid, pipeline_uuid, pipeline_run_spec, pipeline_name,
        name are required. Optional entries such as env_variables can be
        used to further customize the initial state of the newly created
        job.

    Returns:
        A job spec that can be POSTED to the orchest-api to create a job
        that is a duplicate of the job identified by the provided
        job_uuid.
    """
    job_spec = copy.deepcopy(config)
    pipeline_path = pipeline_uuid_to_path(
        job_spec["pipeline_uuid"], job_spec["project_uuid"]
    )
    job_spec["pipeline_run_spec"]["run_config"] = {
        "userdir_pvc": current_app.config["USERDIR_PVC"],
        "project_dir": get_project_directory(job_spec["project_uuid"]),
        "pipeline_path": pipeline_path,
    }

    job_spec["pipeline_definition"] = get_pipeline_json(
        job_spec["pipeline_uuid"], job_spec["project_uuid"]
    )

    # Validate whether the pipeline contains environments
    # that do not exist in the project.
    project_environments = get_environments(job_spec["project_uuid"])
    project_environment_uuids = set(
        [environment.uuid for environment in project_environments]
    )
    pipeline_environment_uuids = get_environments_from_pipeline_json(
        job_spec["pipeline_definition"]
    )

    missing_environment_uuids = pipeline_environment_uuids - project_environment_uuids
    if len(missing_environment_uuids) > 0:
        raise error.EnvironmentsDoNotExist(missing_environment_uuids)

    # Jobs should always have eviction enabled.
    job_spec["pipeline_definition"]["settings"]["auto_eviction"] = True

    job_uuid = str(uuid.uuid4())
    job_spec["uuid"] = job_uuid
    create_job_directory(job_uuid, job_spec["pipeline_uuid"], job_spec["project_uuid"])
    return job_spec
Example #8
0
    def catch_api_proxy_jobs_post():

        json_obj = request.json

        pipeline_path = pipeline_uuid_to_path(
            json_obj["pipeline_uuid"], json_obj["project_uuid"]
        )
        json_obj["pipeline_run_spec"]["run_config"] = {
            "host_user_dir": app.config["HOST_USER_DIR"],
            "project_dir": get_project_directory(
                json_obj["project_uuid"], host_path=True
            ),
            "pipeline_path": pipeline_path,
        }

        json_obj["pipeline_definition"] = get_pipeline_json(
            json_obj["pipeline_uuid"], json_obj["project_uuid"]
        )

        # Jobs should always have eviction enabled.
        json_obj["pipeline_definition"]["settings"]["auto_eviction"] = True

        job_uuid = str(uuid.uuid4())
        json_obj["uuid"] = job_uuid
        create_job_directory(
            job_uuid, json_obj["pipeline_uuid"], json_obj["project_uuid"]
        )

        # Analytics call
        send_pipeline_run(
            app,
            f"{json_obj['project_uuid']}-{json_obj['pipeline_uuid']}",
            get_project_directory(json_obj["project_uuid"]),
            "noninteractive",
        )

        resp = requests.post(
            "http://" + app.config["ORCHEST_API_ADDRESS"] + "/api/jobs/",
            json=json_obj,
        )

        return resp.content, resp.status_code, resp.headers.items()
Example #9
0
    def create_project_file(project_uuid, pipeline_uuid, step_uuid):
        """Create project file in specified directory within project."""

        project_dir = get_project_directory(project_uuid)

        # Client sends absolute path relative to project root, hence the
        # starting / character is removed.
        file_path = os.path.join(project_dir, request.json["file_path"][1:])

        if os.path.isfile(file_path):
            return jsonify({"message": "File already exists."}), 409
        try:
            create_pipeline_file(
                file_path,
                get_pipeline_json(pipeline_uuid, project_uuid),
                project_dir,
                project_uuid,
                step_uuid,
            )
            return jsonify({"message": "File created."})
        except IOError as e:
            app.logger.error("Could not create file at %s. Error: %s" % (file_path, e))
Example #10
0
def duplicate_job_spec(job_uuid: str) -> dict:
    """Returns a job spec to duplicate the provided job.

    Args:
        job_uuid: UUID of the job to duplicate.

    The project, pipeline, name, schedule, env variables and parameters
    of the "parent" job are inherited. Env variables are resolved
    according to the following expression:
        job_spec["env_variables"] = {
            **current_project_env_vars,
            **current_pipeline_env_vars,
            **parent_env_vars}

    Paremeters are resolved by:
        - removing parameters that do not exist anymore.
        - adding new parameters, i.e. parameters that exist in the
          latest version of the pipeline but do not exist for the job
          that is being duplicated.
        - parameters that are not to be removed nor are new, i.e.
          parameters that exist both for the latest pipeline definition
          and the job use the values that were defined in the job that
          is being duplicated.
        - this is true for both the strategy json and the job
          parameters, where each job parameter effectively represents
          the parameterization of a run.
        - the job parameters are updated in a way that, from the client
          POV and given the logic in the job view, preserves the runs
          selection and their ordering, along with the constraint that
          every run parameterization should be unique.

    Returns:
        A job spec that can be POSTED to the orchest-api to create a job
        that is a duplicate of the job identified by the provided
        job_uuid.
    """
    resp = requests.get(
        f'http://{current_app.config["ORCHEST_API_ADDRESS"]}/api/jobs/{job_uuid}'
    )
    if resp.status_code == 404:
        raise error.JobDoesNotExist()

    parent_job = resp.json()
    job_spec = {}
    job_spec["draft"] = True
    job_spec["name"] = "Duplicate of " + parent_job["name"]
    job_spec["cron_schedule"] = parent_job["schedule"]
    job_spec["project_uuid"] = parent_job["project_uuid"]
    job_spec["pipeline_uuid"] = parent_job["pipeline_uuid"]
    job_spec["pipeline_name"] = parent_job["pipeline_name"]
    job_spec["pipeline_run_spec"] = {"run_type": "full", "uuids": []}

    if (
        Project.query.filter_by(
            uuid=job_spec["project_uuid"],
        ).one_or_none()
        is None
    ):
        raise error.ProjectDoesNotExist()

    if (
        Pipeline.query.filter_by(
            uuid=job_spec["pipeline_uuid"],
            project_uuid=job_spec["project_uuid"],
        ).one_or_none()
        is None
    ):
        raise error.PipelineDoesNotExist()

    # Resolve env variables.
    parent_env_vars = parent_job["env_variables"]
    project_env_vars = requests.get(
        f'http://{current_app.config["ORCHEST_API_ADDRESS"]}/api/projects/'
        f'{job_spec["project_uuid"]}'
    ).json()["env_variables"]
    pipeline_env_vars = requests.get(
        f'http://{current_app.config["ORCHEST_API_ADDRESS"]}/api/pipelines/'
        f'{job_spec["project_uuid"]}/{job_spec["pipeline_uuid"]}'
    ).json()["env_variables"]

    # This will merge the project and pipeline env variables then
    # add the parent job env vars, overriding existing env variables
    # and adding env variables that were not part of the project and
    # pipeline env vars. NOTE: we currently have no way to discern
    # old env variables that the job inherited from project/pipeline
    # env variables. If we could, old project/pipeline env vars that
    # do not exist anymore could be discarded.
    job_spec["env_variables"] = {
        **project_env_vars,
        **pipeline_env_vars,
        **parent_env_vars,
    }

    # Resolve parameters.
    latest_pipeline_def = get_pipeline_json(
        job_spec["pipeline_uuid"], job_spec["project_uuid"]
    )
    latest_pipeline_params = latest_pipeline_def["parameters"]
    latest_steps_params = {
        uuid: step["parameters"] for uuid, step in latest_pipeline_def["steps"].items()
    }
    st_json = parent_job["strategy_json"]
    st_json_pipe_params = st_json.get(_config.PIPELINE_PARAMETERS_RESERVED_KEY, {}).get(
        "parameters", {}
    )

    # Pipeline parameters that are new w.r.t to the inherited
    # strategy json.
    new_pipeline_parameters = {
        k: v for k, v in latest_pipeline_params.items() if k not in st_json_pipe_params
    }
    # Pipeline parameters that exist in the strategy json but that
    # have been removed from the latest version.
    removed_pipeline_parameters = {
        k for k in st_json_pipe_params if k not in latest_pipeline_params
    }
    # Steps parameters that are new w.r.t the inherited strategy
    # json.
    new_steps_parameters = defaultdict(dict)
    for latest_step, latest_step_params in latest_steps_params.items():
        if latest_step not in st_json:
            new_steps_parameters[latest_step] = copy.deepcopy(latest_step_params)
        else:
            st_json_step_parameters = st_json[latest_step]["parameters"]
            for k, v in latest_step_params.items():
                if k not in st_json_step_parameters:
                    new_steps_parameters[latest_step][k] = v
    # Steps parameters that exist in the strategy json but that have
    # been removed from the latest version.
    removed_steps_parameters = defaultdict(set)
    for st_json_step, st_json_data in st_json.items():
        if st_json_step == _config.PIPELINE_PARAMETERS_RESERVED_KEY:
            continue
        # Check which params of this step have been removed. Also
        # account if a step has been removed or does not exist
        # anymore.
        for param in st_json_data["parameters"]:
            if (
                st_json_step not in latest_steps_params
                or param not in latest_steps_params[st_json_step]
            ):
                removed_steps_parameters[st_json_step].add(param)

    # Given the current information, i.e. new and to delete
    # pipeline/steps parameters, we proceed to modify the strategy
    # json and job parameters.
    new_job_params = copy.deepcopy(parent_job["parameters"])
    new_st_json = copy.deepcopy(parent_job["strategy_json"])

    # Remove removed pipeline parameters.
    for removed_pipeline_param in removed_pipeline_parameters:
        del new_st_json[_config.PIPELINE_PARAMETERS_RESERVED_KEY]["parameters"][
            removed_pipeline_param
        ]
        for run_params in new_job_params:
            del run_params[_config.PIPELINE_PARAMETERS_RESERVED_KEY][
                removed_pipeline_param
            ]
    # Remove removed steps parameters.
    for step_with_removed_params, removed_params in removed_steps_parameters.items():
        for p in removed_params:
            del new_st_json[step_with_removed_params]["parameters"][p]
        for run_params in new_job_params:
            for p in removed_params:
                del run_params[step_with_removed_params][p]
    # Add new pipeline parameters.
    if new_pipeline_parameters:
        # In case there were no pipeline parameters before.
        if _config.PIPELINE_PARAMETERS_RESERVED_KEY not in new_st_json:
            d = dict()
            d["title"] = job_spec["pipeline_name"]
            d["key"] = _config.PIPELINE_PARAMETERS_RESERVED_KEY
            d["parameters"] = {}
            new_st_json[_config.PIPELINE_PARAMETERS_RESERVED_KEY] = d
        # Expected format for the strategy json.
        for k, v in new_pipeline_parameters.items():
            new_st_json[_config.PIPELINE_PARAMETERS_RESERVED_KEY]["parameters"][
                k
            ] = json.dumps([v])
        # Given that we use the default value specified in the
        # pipeline definition we don't have to combinatorially
        # generate new run_params.
        for run_params in new_job_params:
            if _config.PIPELINE_PARAMETERS_RESERVED_KEY not in run_params:
                run_params[_config.PIPELINE_PARAMETERS_RESERVED_KEY] = {}
            run_params[_config.PIPELINE_PARAMETERS_RESERVED_KEY].update(
                new_pipeline_parameters
            )
    # Add new steps parameters.
    for step_with_new_params, new_step_params in new_steps_parameters.items():
        if step_with_new_params not in new_st_json:
            d = dict()
            d["title"] = latest_pipeline_def["steps"][step_with_new_params]["title"]
            d["key"] = step_with_new_params
            d["parameters"] = {}
            new_st_json[step_with_new_params] = d
        # Expected format for the strategy json.
        for k, v in new_step_params.items():
            new_st_json[step_with_new_params]["parameters"][k] = json.dumps([v])
        # Given that we use the default value specified in the
        # pipeline definition we don't have to combinatorially
        # generate new run_params.
        for run_params in new_job_params:
            if step_with_new_params not in run_params:
                run_params[step_with_new_params] = {}
            run_params[step_with_new_params].update(new_step_params)

    # Empty pipeline parameters and step parameters must be dropped
    # from the strategy json and run params.
    for key in list(new_st_json):
        if not new_st_json[key]["parameters"]:
            del new_st_json[key]
    for run_params in new_job_params:
        for key in list(run_params):
            if not run_params[key]:
                del run_params[key]

    # Lastly, we have to merge run_params that are not unique
    # anymore, given that the deletion of some step or pipeline
    # parameters could have caused the loss of uniqueness. Use the
    # fact that starting with python 3.7 dictionaries preserve the
    # insertion order to preserve the order when merging.
    new_job_params = {json.dumps(k, sort_keys=True): None for k in new_job_params}
    new_job_params = [json.loads(k) for k in new_job_params]

    job_spec["parameters"] = new_job_params
    job_spec["strategy_json"] = new_st_json
    return create_job_spec(job_spec)
Example #11
0
    def pipelines_json(project_uuid, pipeline_uuid):

        if request.method == "POST":

            pipeline_json_path = get_pipeline_path(
                pipeline_uuid,
                project_uuid,
                None,
                request.args.get("pipeline_run_uuid"),
            )

            pipeline_directory = get_pipeline_directory(
                pipeline_uuid,
                project_uuid,
                None,
                request.args.get("pipeline_run_uuid"),
            )

            # Parse JSON.
            pipeline_json = json.loads(request.form.get("pipeline_json"))

            # Normalize relative paths.
            for step in pipeline_json["steps"].values():

                is_project_file = is_valid_pipeline_relative_path(
                    project_uuid, pipeline_uuid, step["file_path"])

                is_data_file = is_valid_data_path(step["file_path"])

                if not (is_project_file or is_data_file):
                    raise app_error.OutOfAllowedDirectoryError(
                        "File is neither in the project, nor in the data directory."
                    )

                if not step["file_path"].startswith("/"):
                    step["file_path"] = normalize_project_relative_path(
                        step["file_path"])

            errors = check_pipeline_correctness(pipeline_json)
            if errors:
                msg = {}
                msg = {"success": False}
                reason = ", ".join([key for key in errors])
                reason = f"Invalid value: {reason}."
                msg["reason"] = reason
                return jsonify(msg), 400

            # Side effect: for each Notebook in de pipeline.json set the
            # correct kernel.
            try:
                pipeline_set_notebook_kernels(pipeline_json,
                                              pipeline_directory, project_uuid)
            except KeyError:
                msg = {
                    "success": False,
                    "reason": "Invalid Notebook metadata structure.",
                }
                return jsonify(msg), 400

            with open(pipeline_json_path, "r") as json_file:
                old_pipeline_json = json.load(json_file)

            # Save the pipeline JSON again to make sure its keys are
            # sorted.
            with open(pipeline_json_path, "w") as json_file:
                json.dump(pipeline_json, json_file, indent=4, sort_keys=True)

            if old_pipeline_json["name"] != pipeline_json["name"]:
                resp = requests.put(
                    (f'http://{current_app.config["ORCHEST_API_ADDRESS"]}'
                     f"/api/pipelines/{project_uuid}/{pipeline_uuid}"),
                    json={"name": pipeline_json["name"]},
                )
                if resp.status_code != 200:
                    return (
                        jsonify(
                            {"message": "Failed to PUT name to orchest-api."}),
                        resp.status_code,
                    )

            # Analytics call.
            analytics.send_event(
                app,
                analytics.Event.PIPELINE_SAVE,
                {"pipeline_definition": pipeline_json},
            )
            return jsonify({
                "success": True,
                "message": "Successfully saved pipeline."
            })

        elif request.method == "GET":
            pipeline_json_path = get_pipeline_path(
                pipeline_uuid,
                project_uuid,
                request.args.get("job_uuid"),
                request.args.get("pipeline_run_uuid"),
            )

            if not os.path.isfile(pipeline_json_path):
                return (
                    jsonify({
                        "success":
                        False,
                        "reason":
                        ".orchest file doesn't exist at location " +
                        pipeline_json_path,
                    }),
                    404,
                )
            else:
                pipeline_json = get_pipeline_json(pipeline_uuid, project_uuid)

                return jsonify({
                    "success": True,
                    "pipeline_json": json.dumps(pipeline_json)
                })