def launch_run(self, instance, run, external_pipeline): """Subclasses must implement this method.""" check.inst_param(run, "run", PipelineRun) check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline) repository_location_handle = external_pipeline.repository_handle.repository_location_handle check.inst( repository_location_handle, GRPC_REPOSITORY_LOCATION_HANDLE_TYPES, "GrpcRunLauncher: Can't launch runs for pipeline not loaded from a GRPC server", ) self._instance.add_run_tags( run.run_id, { GRPC_INFO_TAG: seven.json.dumps( merge_dicts( {"host": repository_location_handle.host}, {"port": repository_location_handle.port} if repository_location_handle.port else {"socket": repository_location_handle.socket}, )) }, ) res = repository_location_handle.client.start_run( ExecuteRunArgs( pipeline_origin=external_pipeline.get_origin(), pipeline_run_id=run.run_id, instance_ref=self._instance.get_ref(), )) if not res.success: raise (DagsterLaunchFailedError( res.message, serializable_error_info=res.serializable_error_info)) self._run_id_to_repository_location_handle_cache[ run.run_id] = repository_location_handle return run
def launch_run(self, instance, run, external_pipeline): check.inst_param(run, "run", PipelineRun) check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline) docker_image = external_pipeline.get_python_origin( ).repository_origin.container_image if not docker_image: docker_image = self._image if not docker_image: raise Exception( "No docker image specified by the instance config or repository" ) try: # validate that the docker image name is valid reference.Reference.parse(docker_image) except Exception as e: raise Exception( "Docker image name {docker_image} is not correctly formatted". format(docker_image=docker_image)) from e input_json = serialize_dagster_namedtuple( ExecuteRunArgs( pipeline_origin=external_pipeline.get_python_origin(), pipeline_run_id=run.run_id, instance_ref=instance.get_ref(), )) command = "dagster api execute_run_with_structured_logs {}".format( json.dumps(input_json)) docker_env = ( {env_name: os.getenv(env_name) for env_name in self._env_vars} if self._env_vars else {}) client = self._get_client() try: container = client.containers.create( image=docker_image, command=command, detach=True, environment=docker_env, network=self._network, ) except docker.errors.ImageNotFound: client.images.pull(docker_image) container = client.containers.create( image=docker_image, command=command, detach=True, environment=docker_env, network=self._network, ) self._instance.report_engine_event( message= "Launching run in a new container {container_id} with image {docker_image}" .format( container_id=container.id, docker_image=docker_image, ), pipeline_run=run, cls=self.__class__, ) self._instance.add_run_tags( run.run_id, { DOCKER_CONTAINER_ID_TAG: container.id, DOCKER_IMAGE_TAG: docker_image }, ) container.start() return run
def launch_run(self, context: LaunchRunContext) -> None: """ Launch a run in an ECS task. Currently, Fargate is the only supported launchType and awsvpc is the only supported networkMode. These are the defaults that are set up by docker-compose when you use the Dagster ECS reference deployment. """ run = context.pipeline_run metadata = self._task_metadata() pipeline_origin = context.pipeline_code_origin image = pipeline_origin.repository_origin.container_image task_definition = self._task_definition(metadata, image)["family"] args = ExecuteRunArgs( pipeline_origin=pipeline_origin, pipeline_run_id=run.run_id, instance_ref=self._instance.get_ref(), ) command = args.get_command_args() # Set cpu or memory overrides # https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-cpu-memory-error.html overrides = {} tags = self._get_run_tags(run.run_id) if tags.cpu: overrides["cpu"] = tags.cpu if tags.memory: overrides["memory"] = tags.memory # Run a task using the same network configuration as this processes's # task. response = self.ecs.run_task( taskDefinition=task_definition, cluster=metadata.cluster, overrides={ "containerOverrides": [{ "name": self.container_name, "command": command }], **overrides, }, networkConfiguration={ "awsvpcConfiguration": { "subnets": metadata.subnets, "assignPublicIp": metadata.assign_public_ip, "securityGroups": metadata.security_groups, } }, launchType="FARGATE", ) arn = response["tasks"][0]["taskArn"] self._set_run_tags(run.run_id, task_arn=arn) self._set_ecs_tags(run.run_id, task_arn=arn) self._instance.report_engine_event( message="Launching run in ECS task", pipeline_run=run, engine_event_data=EngineEventData([ EventMetadataEntry.text(arn, "ECS Task ARN"), EventMetadataEntry.text(metadata.cluster, "ECS Cluster"), EventMetadataEntry.text(run.run_id, "Run ID"), ]), cls=self.__class__, )
def test_cancel_run(): with temp_instance() as instance: loadable_target_origin = LoadableTargetOrigin( executable_path=sys.executable, python_file=__file__, working_directory=None, ) with GrpcServerProcess( loadable_target_origin, max_workers=10).create_ephemeral_client() as api_client: streaming_results = [] pipeline_run = instance.create_run_for_pipeline( streaming_pipeline, run_config={ 'solids': { 'streamer': { 'config': { 'length': 20 } } } }, ) execute_run_args = ExecuteRunArgs( pipeline_origin=PipelineGrpcServerOrigin( pipeline_name='streaming_pipeline', repository_origin=RepositoryGrpcServerOrigin( host='localhost', socket=api_client.socket, port=api_client.port, repository_name='test_repository', ), ), pipeline_run_id=pipeline_run.run_id, instance_ref=instance.get_ref(), ) stream_events_result_thread = threading.Thread( target=_stream_events_target, args=[streaming_results, api_client, execute_run_args]) stream_events_result_thread.daemon = True stream_events_result_thread.start() poll_for_step_start(instance, pipeline_run.run_id) res = api_client.cancel_execution( cancel_execution_request=CancelExecutionRequest( run_id=pipeline_run.run_id)) assert res.success is True poll_for_run(instance, pipeline_run.run_id) logs = instance.all_logs(pipeline_run.run_id) assert (len([ ev for ev in logs if ev.dagster_event.event_type_value == 'STEP_MATERIALIZATION' ]) < 20) # soft termination assert [ ev for ev in logs if ev.dagster_event.event_type_value == 'STEP_FAILURE' ]
def launch_run(self, context: LaunchRunContext) -> None: """ Launch a run in an ECS task. Currently, Fargate is the only supported launchType and awsvpc is the only supported networkMode. These are the defaults that are set up by docker-compose when you use the Dagster ECS reference deployment. """ run = context.pipeline_run family = sanitize_family( run.external_pipeline_origin.external_repository_origin.repository_location_origin.location_name ) container_context = EcsContainerContext.create_for_run(run, self) metadata = self._task_metadata() pipeline_origin = context.pipeline_code_origin image = pipeline_origin.repository_origin.container_image task_definition = self._task_definition(family, metadata, image, container_context)[ "family" ] args = ExecuteRunArgs( pipeline_origin=pipeline_origin, pipeline_run_id=run.run_id, instance_ref=self._instance.get_ref(), ) command = args.get_command_args() # Set cpu or memory overrides # https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-cpu-memory-error.html cpu_and_memory_overrides = {} tags = self._get_run_tags(run.run_id) if tags.cpu: cpu_and_memory_overrides["cpu"] = tags.cpu if tags.memory: cpu_and_memory_overrides["memory"] = tags.memory # Run a task using the same network configuration as this processes's # task. response = self.ecs.run_task( taskDefinition=task_definition, cluster=metadata.cluster, overrides={ "containerOverrides": [ { "name": self.container_name, "command": command, # containerOverrides expects cpu/memory as integers **{k: int(v) for k, v in cpu_and_memory_overrides.items()}, } ], # taskOverrides expects cpu/memory as strings **cpu_and_memory_overrides, }, networkConfiguration={ "awsvpcConfiguration": { "subnets": metadata.subnets, "assignPublicIp": metadata.assign_public_ip, "securityGroups": metadata.security_groups, } }, launchType="FARGATE", ) tasks = response["tasks"] if not tasks: failures = response["failures"] exceptions = [] for failure in failures: arn = failure.get("arn") reason = failure.get("reason") detail = failure.get("detail") exceptions.append(Exception(f"Task {arn} failed because {reason}: {detail}")) raise Exception(exceptions) arn = tasks[0]["taskArn"] self._set_run_tags(run.run_id, task_arn=arn) self._set_ecs_tags(run.run_id, task_arn=arn) self._instance.report_engine_event( message="Launching run in ECS task", pipeline_run=run, engine_event_data=EngineEventData( [ MetadataEntry("ECS Task ARN", value=arn), MetadataEntry("ECS Cluster", value=metadata.cluster), MetadataEntry("Run ID", value=run.run_id), ] ), cls=self.__class__, )
def test_raise_on_error(kubeconfig_file): mock_k8s_client_batch_api = mock.MagicMock() celery_k8s_run_launcher = CeleryK8sRunLauncher( instance_config_map="dagster-instance", postgres_password_secret="dagster-postgresql-secret", dagster_home="/opt/dagster/dagster_home", load_incluster_config=False, kubeconfig_file=kubeconfig_file, k8s_client_batch_api=mock_k8s_client_batch_api, fail_pod_on_run_failure=True, ) # Create fake external pipeline. recon_pipeline = reconstructable(fake_pipeline) recon_repo = recon_pipeline.repository with instance_for_test() as instance: with in_process_test_workspace(instance, recon_repo) as workspace: location = workspace.get_repository_location( workspace.repository_location_names[0]) repo_def = recon_repo.get_definition() repo_handle = RepositoryHandle( repository_name=repo_def.name, repository_location=location, ) fake_external_pipeline = external_pipeline_from_recon_pipeline( recon_pipeline, solid_selection=None, repository_handle=repo_handle, ) celery_k8s_run_launcher.register_instance(instance) pipeline_name = "demo_pipeline" run_config = { "execution": { "celery-k8s": { "config": { "job_image": "fake-image-name" } } } } run = create_run_for_test( instance, pipeline_name=pipeline_name, run_config=run_config, external_pipeline_origin=fake_external_pipeline. get_external_origin(), pipeline_code_origin=fake_external_pipeline.get_python_origin( ), ) celery_k8s_run_launcher.launch_run(LaunchRunContext( run, workspace)) # Check that user defined k8s config was passed down to the k8s job. mock_method_calls = mock_k8s_client_batch_api.method_calls assert len(mock_method_calls) > 0 method_name, _args, kwargs = mock_method_calls[0] assert method_name == "create_namespaced_job" container = kwargs["body"].spec.template.spec.containers[0] args = container.args assert (args == ExecuteRunArgs( pipeline_origin=run.pipeline_code_origin, pipeline_run_id=run.run_id, instance_ref=instance.get_ref(), set_exit_code_on_failure=True, ).get_command_args())
def test_user_defined_k8s_config_in_run_tags(kubeconfig_file): labels = {"foo_label_key": "bar_label_value"} # Construct a K8s run launcher in a fake k8s environment. mock_k8s_client_batch_api = mock.MagicMock() celery_k8s_run_launcher = CeleryK8sRunLauncher( instance_config_map="dagster-instance", postgres_password_secret="dagster-postgresql-secret", dagster_home="/opt/dagster/dagster_home", load_incluster_config=False, kubeconfig_file=kubeconfig_file, k8s_client_batch_api=mock_k8s_client_batch_api, labels=labels, ) # Construct Dagster run tags with user defined k8s config. expected_resources = { "requests": { "cpu": "250m", "memory": "64Mi" }, "limits": { "cpu": "500m", "memory": "2560Mi" }, } user_defined_k8s_config = UserDefinedDagsterK8sConfig( container_config={"resources": expected_resources}, ) user_defined_k8s_config_json = json.dumps( user_defined_k8s_config.to_dict()) tags = {"dagster-k8s/config": user_defined_k8s_config_json} # Create fake external pipeline. recon_pipeline = reconstructable(fake_pipeline) recon_repo = recon_pipeline.repository with instance_for_test() as instance: with in_process_test_workspace(instance, recon_repo) as workspace: location = workspace.get_repository_location( workspace.repository_location_names[0]) repo_def = recon_repo.get_definition() repo_handle = RepositoryHandle( repository_name=repo_def.name, repository_location=location, ) fake_external_pipeline = external_pipeline_from_recon_pipeline( recon_pipeline, solid_selection=None, repository_handle=repo_handle, ) celery_k8s_run_launcher.register_instance(instance) pipeline_name = "demo_pipeline" run_config = { "execution": { "celery-k8s": { "config": { "job_image": "fake-image-name" } } } } run = create_run_for_test( instance, pipeline_name=pipeline_name, run_config=run_config, tags=tags, external_pipeline_origin=fake_external_pipeline. get_external_origin(), pipeline_code_origin=fake_external_pipeline.get_python_origin( ), ) celery_k8s_run_launcher.launch_run(LaunchRunContext( run, workspace)) updated_run = instance.get_run_by_id(run.run_id) assert updated_run.tags[DOCKER_IMAGE_TAG] == "fake-image-name" # Check that user defined k8s config was passed down to the k8s job. mock_method_calls = mock_k8s_client_batch_api.method_calls assert len(mock_method_calls) > 0 method_name, _args, kwargs = mock_method_calls[0] assert method_name == "create_namespaced_job" container = kwargs["body"].spec.template.spec.containers[0] job_resources = container.resources assert job_resources == expected_resources labels = kwargs["body"].spec.template.metadata.labels assert labels["foo_label_key"] == "bar_label_value" args = container.args assert (args == ExecuteRunArgs( pipeline_origin=run.pipeline_code_origin, pipeline_run_id=run.run_id, instance_ref=instance.get_ref(), set_exit_code_on_failure=None, ).get_command_args())
def test_launcher_with_container_context(kubeconfig_file): # Construct a K8s run launcher in a fake k8s environment. mock_k8s_client_batch_api = mock.MagicMock() k8s_run_launcher = K8sRunLauncher( service_account_name="dagit-admin", instance_config_map="dagster-instance", postgres_password_secret="dagster-postgresql-secret", dagster_home="/opt/dagster/dagster_home", job_image="fake_job_image", load_incluster_config=False, kubeconfig_file=kubeconfig_file, k8s_client_batch_api=mock_k8s_client_batch_api, env_vars=["FOO_TEST"], ) container_context_config = {"k8s": {"env_vars": ["BAR_TEST"]}} # Create fake external pipeline. recon_pipeline = reconstructable(fake_pipeline) recon_repo = recon_pipeline.repository repo_def = recon_repo.get_definition() python_origin = recon_pipeline.get_python_origin() python_origin = python_origin._replace( repository_origin=python_origin.repository_origin._replace( container_context=container_context_config, ) ) loadable_target_origin = LoadableTargetOrigin(python_file=__file__) with instance_for_test() as instance: with in_process_test_workspace(instance, loadable_target_origin) as workspace: location = workspace.get_repository_location(workspace.repository_location_names[0]) repo_handle = RepositoryHandle( repository_name=repo_def.name, repository_location=location, ) fake_external_pipeline = external_pipeline_from_recon_pipeline( recon_pipeline, solid_selection=None, repository_handle=repo_handle, ) # Launch the run in a fake Dagster instance. pipeline_name = "demo_pipeline" run = create_run_for_test( instance, pipeline_name=pipeline_name, external_pipeline_origin=fake_external_pipeline.get_external_origin(), pipeline_code_origin=python_origin, ) k8s_run_launcher.register_instance(instance) k8s_run_launcher.launch_run(LaunchRunContext(run, workspace)) updated_run = instance.get_run_by_id(run.run_id) assert updated_run.tags[DOCKER_IMAGE_TAG] == "fake_job_image" # Check that user defined k8s config was passed down to the k8s job. mock_method_calls = mock_k8s_client_batch_api.method_calls assert len(mock_method_calls) > 0 method_name, _args, kwargs = mock_method_calls[0] assert method_name == "create_namespaced_job" container = kwargs["body"].spec.template.spec.containers[0] env_names = [env.name for env in container.env] assert "BAR_TEST" in env_names assert "FOO_TEST" in env_names args = container.args assert ( args == ExecuteRunArgs( pipeline_origin=run.pipeline_code_origin, pipeline_run_id=run.run_id, instance_ref=instance.get_ref(), set_exit_code_on_failure=None, ).get_command_args() )