Exemple #1
0
    def _transaction(
        self,
        project_uuid: str,
        run_config: Dict[str, Any],
        pipeline: Pipeline,
    ):
        # specify the task_id beforehand to avoid race conditions
        # between the task and its presence in the db
        task_id = str(uuid.uuid4())

        # NOTE: we are setting the status of the run ourselves without
        # using the option of celery to get the status of tasks. This
        # way we do not have to configure a backend (where the default
        # of "rpc://" does not give the results we would want).
        run = {
            "uuid": task_id,
            "pipeline_uuid": pipeline.properties["uuid"],
            "project_uuid": project_uuid,
            "status": "PENDING",
        }
        db.session.add(models.InteractivePipelineRun(**run))
        # need to flush because otherwise the bulk insertion of pipeline
        # steps will lead to foreign key errors
        # https://docs.sqlalchemy.org/en/13/orm/persistence_techniques.html#bulk-operations-caveats
        db.session.flush()

        # Set an initial value for the status of the pipeline steps that
        # will be run.
        step_uuids = [s.properties["uuid"] for s in pipeline.steps]

        pipeline_steps = []
        for step_uuid in step_uuids:
            pipeline_steps.append(
                models.PipelineRunStep(
                    **{
                        "run_uuid": task_id,
                        "step_uuid": step_uuid,
                        "status": "PENDING",
                    }
                )
            )
        db.session.bulk_save_objects(pipeline_steps)
        run["pipeline_steps"] = pipeline_steps

        self.collateral_kwargs["project_uuid"] = project_uuid
        self.collateral_kwargs["task_id"] = task_id
        self.collateral_kwargs["pipeline"] = pipeline
        self.collateral_kwargs["run_config"] = run_config
        self.collateral_kwargs["env_variables"] = get_proj_pip_env_variables(
            project_uuid, pipeline.properties["uuid"]
        )
        return run
Exemple #2
0
    def _transaction(
        self,
        job: Dict[str, Any],
        pipeline_run_spec: Dict[str, Any],
        pipeline_definitions: List[Dict[str, Any]],
        pipeline_run_ids: List[str],
    ):

        db.session.add(models.Job(**job))
        # So that the job can be returned with all runs.
        job["pipeline_runs"] = []
        # To be later used by the collateral effect function.
        tasks_to_launch = []

        for pipeline_definition, id_ in zip(pipeline_definitions,
                                            pipeline_run_ids):
            # Note: the pipeline definition contains the parameters of
            # the specific run.
            pipeline_run_spec["pipeline_definition"] = pipeline_definition
            pipeline = construct_pipeline(**pipeline_run_spec)

            # Specify the task_id beforehand to avoid race conditions
            # between the task and its presence in the db.
            task_id = str(uuid.uuid4())
            tasks_to_launch.append((task_id, pipeline))

            non_interactive_run = {
                "job_uuid": job["job_uuid"],
                "run_uuid": task_id,
                "pipeline_run_id": id_,
                "pipeline_uuid": pipeline.properties["uuid"],
                "project_uuid": job["project_uuid"],
                "status": "PENDING",
            }
            db.session.add(
                models.NonInteractivePipelineRun(**non_interactive_run))
            # Need to flush because otherwise the bulk insertion of
            # pipeline steps will lead to foreign key errors.
            # https://docs.sqlalchemy.org/en/13/orm/persistence_techniques.html#bulk-operations-caveats
            db.session.flush()

            # TODO: this code is also in `namespace_runs`. Could
            #       potentially be put in a function for modularity.
            # Set an initial value for the status of the pipeline
            # steps that will be run.
            step_uuids = [s.properties["uuid"] for s in pipeline.steps]
            pipeline_steps = []
            for step_uuid in step_uuids:
                pipeline_steps.append(
                    models.PipelineRunStep(
                        **{
                            "run_uuid": task_id,
                            "step_uuid": step_uuid,
                            "status": "PENDING",
                        }))
            db.session.bulk_save_objects(pipeline_steps)

            non_interactive_run["pipeline_steps"] = pipeline_steps
            job["pipeline_runs"].append(non_interactive_run)

        self.collateral_kwargs["job"] = job
        self.collateral_kwargs["tasks_to_launch"] = tasks_to_launch
        self.collateral_kwargs["pipeline_run_spec"] = pipeline_run_spec

        return job
Exemple #3
0
    def _transaction(self, job_uuid: str):

        # with_entities is so that we do not retrieve the interactive
        # runs of the job, since we do not need those.
        job = (
            models.Job.query.with_entities(models.Job)
            # Use with_for_update so that the job entry will be locked
            # until commit, so that if, for whatever reason, the same
            # job is launched concurrently the different launchs will
            # actually be serialized, i.e. one has to wait for the
            # commit of the other, so that the launched runs will
            # correctly refer to a different total_scheduled_executions
            # number.
            # https://docs.sqlalchemy.org/en/13/orm/query.html#sqlalchemy.orm.query.Query.with_for_update
            # https://www.postgresql.org/docs/9.0/sql-select.html#SQL-FOR-UPDATE-SHARE
            .with_for_update().filter_by(uuid=job_uuid).one())
        # In case the job gets aborted while the scheduler attempts to
        # run it.
        if job.status == "ABORTED":
            self.collateral_kwargs["job"] = dict()
            self.collateral_kwargs["tasks_to_launch"] = []
            self.collateral_kwargs["run_config"] = dict()

        # The status of jobs that run once is initially set to PENDING,
        # thus we need to update that.
        if job.status == "PENDING":
            job.status = "STARTED"

        # To be later used by the collateral effect function.
        tasks_to_launch = []

        # The number of pipeline runs of a job, across all job runs. We
        # could use 'count' but 'max' is safer, if for any reason a
        # pipeline run is not there, e.g. if pipeline runs 0 and 2 are
        # there, but not 1, 'count' would keep returning 2, and no runs
        # could be launched anymore because of the (job_uuid,
        # pipeline_run_index) constraint.
        pipeline_run_index = (db.session.query(
            func.max(models.NonInteractivePipelineRun.pipeline_run_index)).
                              filter_by(job_uuid=job_uuid).one())[0]
        if pipeline_run_index is None:
            pipeline_run_index = 0
        else:
            pipeline_run_index += 1

        # run_index is the index of the run within the runs of this job
        # scheduling/execution.
        for run_index, run_parameters in enumerate(job.parameters):
            pipeline_def = copy.deepcopy(job.pipeline_definition)

            # Set the pipeline parameters:
            pipeline_def["parameters"] = run_parameters.get(
                _config.PIPELINE_PARAMETERS_RESERVED_KEY, {})

            # Set the steps parameters in the pipeline definition.
            for step_uuid, step_parameters in run_parameters.items():
                # One of the entries is not actually a step_uuid.
                if step_uuid != _config.PIPELINE_PARAMETERS_RESERVED_KEY:
                    pipeline_def["steps"][step_uuid][
                        "parameters"] = step_parameters

            # Instantiate a pipeline object given the specs, definition
            # and parameters.
            pipeline_run_spec = copy.deepcopy(job.pipeline_run_spec)
            pipeline_run_spec["pipeline_definition"] = pipeline_def
            pipeline = construct_pipeline(**pipeline_run_spec)

            # Specify the task_id beforehand to avoid race conditions
            # between the task and its presence in the db.
            task_id = str(uuid.uuid4())
            tasks_to_launch.append((task_id, pipeline))

            non_interactive_run = {
                "job_uuid": job.uuid,
                "uuid": task_id,
                "pipeline_uuid": job.pipeline_uuid,
                "project_uuid": job.project_uuid,
                "status": "PENDING",
                "parameters": run_parameters,
                "job_run_index": job.total_scheduled_executions,
                "job_run_pipeline_run_index": run_index,
                "pipeline_run_index": pipeline_run_index,
                "env_variables": job.env_variables,
            }
            pipeline_run_index += 1

            db.session.add(
                models.NonInteractivePipelineRun(**non_interactive_run))
            # Need to flush because otherwise the bulk insertion of
            # pipeline steps will lead to foreign key errors.
            # https://docs.sqlalchemy.org/en/13/orm/persistence_techniques.html#bulk-operations-caveats
            db.session.flush()

            # TODO: this code is also in `namespace_runs`. Could
            #       potentially be put in a function for modularity.
            # Set an initial value for the status of the pipeline
            # steps that will be run.
            step_uuids = [s.properties["uuid"] for s in pipeline.steps]
            pipeline_steps = []
            for step_uuid in step_uuids:
                pipeline_steps.append(
                    models.PipelineRunStep(
                        **{
                            "run_uuid": task_id,
                            "step_uuid": step_uuid,
                            "status": "PENDING",
                        }))
            db.session.bulk_save_objects(pipeline_steps)

        job.total_scheduled_executions += 1
        self.collateral_kwargs["job"] = job.as_dict()
        self.collateral_kwargs["tasks_to_launch"] = tasks_to_launch
Exemple #4
0
    def post(self):
        """Queues a new experiment."""
        # TODO: possibly use marshal() on the post_data. Note that we
        # have moved over to using flask_restx
        # https://flask-restx.readthedocs.io/en/stable/api.html#flask_restx.marshal
        #       to make sure the default values etc. are filled in.
        post_data = request.get_json()

        # TODO: maybe we can expect a datetime (in the schema) so we
        #       do not have to parse it here. Again note that we are now
        #       using flask_restx
        # https://flask-restx.readthedocs.io/en/stable/api.html#flask_restx.fields.DateTime
        scheduled_start = post_data["scheduled_start"]
        scheduled_start = datetime.fromisoformat(scheduled_start)

        experiment = {
            "experiment_uuid":
            post_data["experiment_uuid"],
            "project_uuid":
            post_data["project_uuid"],
            "pipeline_uuid":
            post_data["pipeline_uuid"],
            "scheduled_start":
            scheduled_start,
            "total_number_of_pipeline_runs":
            len(post_data["pipeline_definitions"]),
        }
        db.session.add(models.Experiment(**experiment))
        db.session.commit()

        pipeline_runs = []
        pipeline_run_spec = post_data["pipeline_run_spec"]
        env_uuid_docker_id_mappings = None
        # this way we write the entire exp to db, but avoid
        # launching any run (celery task) if we detected a problem
        experiment_creation_error_messages = []
        tasks_to_launch = []

        # TODO: This can be made more efficient, since the pipeline
        #       is the same for all pipeline runs. The only
        #       difference is the parameters. So all the jobs could
        #       be created in batch.
        for pipeline_definition, id_ in zip(post_data["pipeline_definitions"],
                                            post_data["pipeline_run_ids"]):
            pipeline_run_spec["pipeline_definition"] = pipeline_definition
            pipeline = construct_pipeline(**post_data["pipeline_run_spec"])

            # specify the task_id beforehand to avoid race conditions
            # between the task and its presence in the db
            task_id = str(uuid.uuid4())

            non_interactive_run = {
                "experiment_uuid": post_data["experiment_uuid"],
                "run_uuid": task_id,
                "pipeline_run_id": id_,
                "pipeline_uuid": pipeline.properties["uuid"],
                "project_uuid": post_data["project_uuid"],
                "status": "PENDING",
            }
            db.session.add(
                models.NonInteractivePipelineRun(**non_interactive_run))
            # need to flush because otherwise the bulk insertion of
            # pipeline steps will lead to foreign key errors
            # https://docs.sqlalchemy.org/en/13/orm/persistence_techniques.html#bulk-operations-caveats
            db.session.flush()

            # TODO: this code is also in `namespace_runs`. Could
            #       potentially be put in a function for modularity.
            # Set an initial value for the status of the pipeline
            # steps that will be run.
            step_uuids = [s.properties["uuid"] for s in pipeline.steps]
            pipeline_steps = []
            for step_uuid in step_uuids:
                pipeline_steps.append(
                    models.PipelineRunStep(
                        **{
                            "run_uuid": task_id,
                            "step_uuid": step_uuid,
                            "status": "PENDING",
                        }))
            db.session.bulk_save_objects(pipeline_steps)
            db.session.commit()

            non_interactive_run["pipeline_steps"] = pipeline_steps
            pipeline_runs.append(non_interactive_run)

            # get docker ids of images to use and make it so that the
            # images will not be deleted in case they become
            # outdated by an environment rebuild
            # compute it only once because this way we are guaranteed
            # that the mappings will be the same for all runs, having
            # a new environment build terminate while submitting the
            # different runs won't affect the experiment
            if env_uuid_docker_id_mappings is None:
                try:
                    env_uuid_docker_id_mappings = lock_environment_images_for_run(
                        task_id,
                        post_data["project_uuid"],
                        pipeline.get_environments(),
                    )
                except errors.ImageNotFound as e:
                    experiment_creation_error_messages.append(
                        f"Pipeline was referencing environments for "
                        f"which an image does not exist, {e}")
            else:
                image_mappings = [
                    models.PipelineRunImageMapping(
                        **{
                            "run_uuid": task_id,
                            "orchest_environment_uuid": env_uuid,
                            "docker_img_id": docker_id,
                        }) for env_uuid, docker_id in
                    env_uuid_docker_id_mappings.items()
                ]
                db.session.bulk_save_objects(image_mappings)
                db.session.commit()

            if len(experiment_creation_error_messages) == 0:
                # prepare the args for the task
                run_config = pipeline_run_spec["run_config"]
                run_config[
                    "env_uuid_docker_id_mappings"] = env_uuid_docker_id_mappings
                celery_job_kwargs = {
                    "experiment_uuid": post_data["experiment_uuid"],
                    "project_uuid": post_data["project_uuid"],
                    "pipeline_definition": pipeline.to_dict(),
                    "run_config": run_config,
                }

                # Due to circular imports we use the task name instead
                # of importing the function directly.
                tasks_to_launch.append({
                    "name":
                    "app.core.tasks.start_non_interactive_pipeline_run",
                    "eta": scheduled_start,
                    "kwargs": celery_job_kwargs,
                    "task_id": task_id,
                })

        experiment["pipeline_runs"] = pipeline_runs

        if len(experiment_creation_error_messages) == 0:
            # Create Celery object with the Flask context
            celery = make_celery(current_app)
            for task in tasks_to_launch:
                res = celery.send_task(**task)
                # NOTE: this is only if a backend is configured.
                # The task does not return anything. Therefore we can
                # forget its result and make sure that the Celery
                # backend releases recourses (for storing and
                # transmitting results) associated to the task.
                # Uncomment the line below if applicable.
                res.forget()

            return experiment, 201
        else:
            current_app.logger.error(
                "\n".join(experiment_creation_error_messages))

            # simple way to update both in memory objects
            # and the db while avoiding multiple update statements
            # (1 for each object)
            for pipeline_run in experiment["pipeline_runs"]:
                pipeline_run.status = "SUCCESS"
                for step in pipeline_run["pipeline_steps"]:
                    step.status = "FAILURE"

                models.PipelineRunStep.query.filter_by(
                    run_uuid=pipeline_run["run_uuid"]).update(
                        {"status": "FAILURE"})

            models.NonInteractivePipelineRun.query.filter_by(
                experiment_uuid=post_data["experiment_uuid"]).update(
                    {"status": "SUCCESS"})
            db.session.commit()

            return {
                "message":
                ("Failed to create experiment because not all referenced"
                 "environments are available.")
            }, 500