def get_env_uuids_to_docker_id_mappings( project_uuid: str, env_uuids: Set[str] ) -> Dict[str, str]: """Map each environment uuid to its current image docker id. Args: project_uuid: UUID of the project to which the environments belong env_uuids: Set of environment uuids. Returns: Dict[env_uuid] = docker_id """ env_uuid_docker_id_mappings = { env_uuid: get_environment_image_docker_id( _config.ENVIRONMENT_IMAGE_NAME.format( project_uuid=project_uuid, environment_uuid=env_uuid ) ) for env_uuid in env_uuids } missing_images = [ str(errors.ImageNotFound(f"{env_uuid} has no docker image")) for env_uuid, docker_id in env_uuid_docker_id_mappings.items() if docker_id is None ] if len(missing_images) > 0: raise errors.ImageNotFound("\n".join(missing_images)) return env_uuid_docker_id_mappings
def _collateral( self, project_uuid: str, task_id: str, pipeline: Pipeline, run_config: Dict[str, Any], env_variables: Dict[str, Any], **kwargs, ): # Get docker ids of images to use and make it so that the images # will not be deleted in case they become outdated by an # environment rebuild. try: env_uuid_docker_id_mappings = lock_environment_images_for_run( task_id, project_uuid, pipeline.get_environments(), ) except errors.ImageNotFound as e: msg = ( "Pipeline references environments that do not exist in the" f" project, the following environments do not exist: [{e}].\n\n" "Please make sure all pipeline steps are assigned an" " environment that exists in the project." ) raise errors.ImageNotFound(msg) # Create Celery object with the Flask context and construct the # kwargs for the job. celery = make_celery(current_app) run_config["env_uuid_docker_id_mappings"] = env_uuid_docker_id_mappings run_config["user_env_variables"] = env_variables celery_job_kwargs = { "pipeline_definition": pipeline.to_dict(), "project_uuid": project_uuid, "run_config": run_config, } # Start the run as a background task on Celery. Due to circular # imports we send the task by name instead of importing the # function directly. res = celery.send_task( "app.core.tasks.run_pipeline", kwargs=celery_job_kwargs, task_id=task_id, ) # NOTE: this is only if a backend is configured. The task does # not return anything. Therefore we can forget its result and # make sure that the Celery backend releases recourses (for # storing and transmitting results) associated to the task. # Uncomment the line below if applicable. res.forget()
def test_docker_image_exists_with_image_not_found_error(mocker, mock_client): mock_images = mocker.Mock() mock_images.get = mocker.Mock() images_mock = mocker.PropertyMock(return_value=mock_images) mock_client.images = images_mock image_tag = "gcr.io/sigint/test-image-name" mock_client.images.get.side_effect = docker_errors.ImageNotFound( "pew", "pew") exists = docker_utils.docker_image_exists(image_tag, mock_client) assert not exists
def _collateral( self, job: Dict[str, Any], pipeline_run_spec: Dict[str, Any], tasks_to_launch: Tuple[str, Pipeline], ): # Safety check in case the job has no runs. if not tasks_to_launch: return # Get docker ids of images to use and make it so that the # images will not be deleted in case they become outdate by an # an environment rebuild. Compute it only once because this way # we are guaranteed that the mappings will be the same for all # runs, having a new environment build terminate while # submitting the different runs won't affect the job. try: env_uuid_docker_id_mappings = lock_environment_images_for_run( # first (task_id, pipeline) -> task id. tasks_to_launch[0][0], job["project_uuid"], # first (task_id, pipeline) -> pipeline. tasks_to_launch[0][1].get_environments(), ) except errors.ImageNotFound as e: raise errors.ImageNotFound( "Pipeline was referencing environments for " f"which an image does not exist, {e}") for task_id, _ in tasks_to_launch[1:]: image_mappings = [ models.PipelineRunImageMapping( **{ "run_uuid": task_id, "orchest_environment_uuid": env_uuid, "docker_img_id": docker_id, }) for env_uuid, docker_id in env_uuid_docker_id_mappings.items() ] db.session.bulk_save_objects(image_mappings) db.session.commit() # Launch each task through celery. celery = make_celery(current_app) for task_id, pipeline in tasks_to_launch: run_config = pipeline_run_spec["run_config"] run_config[ "env_uuid_docker_id_mappings"] = env_uuid_docker_id_mappings celery_job_kwargs = { "job_uuid": job["job_uuid"], "project_uuid": job["project_uuid"], "pipeline_definition": pipeline.to_dict(), "run_config": run_config, } # Due to circular imports we use the task name instead of # importing the function directly. task_args = { "name": "app.core.tasks.start_non_interactive_pipeline_run", "eta": job["scheduled_start"], "kwargs": celery_job_kwargs, "task_id": task_id, } res = celery.send_task(**task_args) # NOTE: this is only if a backend is configured. The task # does not return anything. Therefore we can forget its # result and make sure that the Celery backend releases # recourses (for storing and transmitting results) # associated to the task. Uncomment the line below if # applicable. res.forget()
def _transaction( self, job_uuid: str, cron_schedule: str, parameters: Dict[str, Any], env_variables: Dict[str, str], next_scheduled_time: str, strategy_json: Dict[str, Any], confirm_draft, ): job = models.Job.query.with_for_update().filter_by(uuid=job_uuid).one() if cron_schedule is not None: if job.schedule is None and job.status != "DRAFT": raise ValueError( ("Failed update operation. Cannot set the schedule of a " "job which is not a cron job already.")) if not croniter.is_valid(cron_schedule): raise ValueError( f"Failed update operation. Invalid cron schedule: {cron_schedule}" ) # Check when is the next time the job should be scheduled # starting from now. job.schedule = cron_schedule job.next_scheduled_time = croniter( cron_schedule, datetime.now(timezone.utc)).get_next(datetime) if parameters is not None: if job.schedule is None and job.status != "DRAFT": raise ValueError(( "Failed update operation. Cannot update the parameters of " "a job which is not a cron job.")) job.parameters = parameters if env_variables is not None: if job.schedule is None and job.status != "DRAFT": raise ValueError(( "Failed update operation. Cannot update the env variables of " "a job which is not a cron job.")) job.env_variables = env_variables if next_scheduled_time is not None: if job.status != "DRAFT": raise ValueError( ("Failed update operation. Cannot set the next scheduled " "time of a job which is not a draft.")) if job.schedule is not None: raise ValueError( ("Failed update operation. Cannot set the next scheduled " "time of a cron job.")) job.next_scheduled_time = datetime.fromisoformat( next_scheduled_time) if strategy_json is not None: if job.schedule is None and job.status != "DRAFT": raise ValueError( ("Failed update operation. Cannot set the strategy json" "of a job which is not a draft nor a cron job.")) job.strategy_json = strategy_json if confirm_draft: if job.status != "DRAFT": raise ValueError( "Failed update operation. The job is not a draft.") # Make sure all environments still exist, that is, the # pipeline is not referring non-existing environments. pipeline_def = job.pipeline_definition environment_uuids = set([ step["environment"] for step in pipeline_def["steps"].values() ]) env_uuids_missing_image = get_env_uuids_missing_image( job.project_uuid, environment_uuids) if env_uuids_missing_image: env_uuids_missing_image = ", ".join(env_uuids_missing_image) msg = ( "Pipeline references environments that do not exist in the" f" project. The following environments do not exist:" f" [{env_uuids_missing_image}].\n\n Please make sure all" " pipeline steps are assigned an environment that exists" " in the project.") raise errors.ImageNotFound(msg) if job.schedule is None: job.status = "PENDING" # One time job that needs to run right now. The # scheduler will not pick it up because it does not have # a next_scheduled_time. if job.next_scheduled_time is None: job.last_scheduled_time = datetime.now(timezone.utc) RunJob(self.tpe).transaction(job.uuid) else: job.last_scheduled_time = job.next_scheduled_time # One time jobs that are set to run at a given date will # now be picked up by the scheduler, since they are not # a draft anymore. # Cron jobs are consired STARTED the moment the scheduler # can decide or not about running them. else: job.last_scheduled_time = job.next_scheduled_time job.status = "STARTED"