예제 #1
0
    def launch_run(self, instance, run, external_pipeline):
        """Subclasses must implement this method."""

        check.inst_param(run, "run", PipelineRun)
        check.inst_param(external_pipeline, "external_pipeline",
                         ExternalPipeline)

        repository_location_handle = external_pipeline.repository_handle.repository_location_handle

        check.inst(
            repository_location_handle,
            GRPC_REPOSITORY_LOCATION_HANDLE_TYPES,
            "GrpcRunLauncher: Can't launch runs for pipeline not loaded from a GRPC server",
        )

        self._instance.add_run_tags(
            run.run_id,
            {
                GRPC_INFO_TAG:
                seven.json.dumps(
                    merge_dicts(
                        {"host": repository_location_handle.host},
                        {"port": repository_location_handle.port}
                        if repository_location_handle.port else
                        {"socket": repository_location_handle.socket},
                    ))
            },
        )

        res = repository_location_handle.client.start_run(
            ExecuteRunArgs(
                pipeline_origin=external_pipeline.get_origin(),
                pipeline_run_id=run.run_id,
                instance_ref=self._instance.get_ref(),
            ))

        if not res.success:
            raise (DagsterLaunchFailedError(
                res.message,
                serializable_error_info=res.serializable_error_info))

        self._run_id_to_repository_location_handle_cache[
            run.run_id] = repository_location_handle

        return run
예제 #2
0
    def launch_run(self, instance, run, external_pipeline):
        check.inst_param(run, "run", PipelineRun)
        check.inst_param(external_pipeline, "external_pipeline",
                         ExternalPipeline)

        docker_image = external_pipeline.get_python_origin(
        ).repository_origin.container_image

        if not docker_image:
            docker_image = self._image

        if not docker_image:
            raise Exception(
                "No docker image specified by the instance config or repository"
            )

        try:
            # validate that the docker image name is valid
            reference.Reference.parse(docker_image)
        except Exception as e:
            raise Exception(
                "Docker image name {docker_image} is not correctly formatted".
                format(docker_image=docker_image)) from e

        input_json = serialize_dagster_namedtuple(
            ExecuteRunArgs(
                pipeline_origin=external_pipeline.get_python_origin(),
                pipeline_run_id=run.run_id,
                instance_ref=instance.get_ref(),
            ))

        command = "dagster api execute_run_with_structured_logs {}".format(
            json.dumps(input_json))

        docker_env = (
            {env_name: os.getenv(env_name)
             for env_name in self._env_vars} if self._env_vars else {})

        client = self._get_client()

        try:
            container = client.containers.create(
                image=docker_image,
                command=command,
                detach=True,
                environment=docker_env,
                network=self._network,
            )

        except docker.errors.ImageNotFound:
            client.images.pull(docker_image)
            container = client.containers.create(
                image=docker_image,
                command=command,
                detach=True,
                environment=docker_env,
                network=self._network,
            )

        self._instance.report_engine_event(
            message=
            "Launching run in a new container {container_id} with image {docker_image}"
            .format(
                container_id=container.id,
                docker_image=docker_image,
            ),
            pipeline_run=run,
            cls=self.__class__,
        )

        self._instance.add_run_tags(
            run.run_id,
            {
                DOCKER_CONTAINER_ID_TAG: container.id,
                DOCKER_IMAGE_TAG: docker_image
            },
        )

        container.start()

        return run
예제 #3
0
    def launch_run(self, context: LaunchRunContext) -> None:
        """
        Launch a run in an ECS task.

        Currently, Fargate is the only supported launchType and awsvpc is the
        only supported networkMode. These are the defaults that are set up by
        docker-compose when you use the Dagster ECS reference deployment.
        """
        run = context.pipeline_run
        metadata = self._task_metadata()
        pipeline_origin = context.pipeline_code_origin
        image = pipeline_origin.repository_origin.container_image
        task_definition = self._task_definition(metadata, image)["family"]

        args = ExecuteRunArgs(
            pipeline_origin=pipeline_origin,
            pipeline_run_id=run.run_id,
            instance_ref=self._instance.get_ref(),
        )
        command = args.get_command_args()

        # Set cpu or memory overrides
        # https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-cpu-memory-error.html
        overrides = {}
        tags = self._get_run_tags(run.run_id)
        if tags.cpu:
            overrides["cpu"] = tags.cpu
        if tags.memory:
            overrides["memory"] = tags.memory

        # Run a task using the same network configuration as this processes's
        # task.
        response = self.ecs.run_task(
            taskDefinition=task_definition,
            cluster=metadata.cluster,
            overrides={
                "containerOverrides": [{
                    "name": self.container_name,
                    "command": command
                }],
                **overrides,
            },
            networkConfiguration={
                "awsvpcConfiguration": {
                    "subnets": metadata.subnets,
                    "assignPublicIp": metadata.assign_public_ip,
                    "securityGroups": metadata.security_groups,
                }
            },
            launchType="FARGATE",
        )

        arn = response["tasks"][0]["taskArn"]
        self._set_run_tags(run.run_id, task_arn=arn)
        self._set_ecs_tags(run.run_id, task_arn=arn)
        self._instance.report_engine_event(
            message="Launching run in ECS task",
            pipeline_run=run,
            engine_event_data=EngineEventData([
                EventMetadataEntry.text(arn, "ECS Task ARN"),
                EventMetadataEntry.text(metadata.cluster, "ECS Cluster"),
                EventMetadataEntry.text(run.run_id, "Run ID"),
            ]),
            cls=self.__class__,
        )
예제 #4
0
def test_cancel_run():
    with temp_instance() as instance:

        loadable_target_origin = LoadableTargetOrigin(
            executable_path=sys.executable,
            python_file=__file__,
            working_directory=None,
        )

        with GrpcServerProcess(
                loadable_target_origin,
                max_workers=10).create_ephemeral_client() as api_client:
            streaming_results = []

            pipeline_run = instance.create_run_for_pipeline(
                streaming_pipeline,
                run_config={
                    'solids': {
                        'streamer': {
                            'config': {
                                'length': 20
                            }
                        }
                    }
                },
            )
            execute_run_args = ExecuteRunArgs(
                pipeline_origin=PipelineGrpcServerOrigin(
                    pipeline_name='streaming_pipeline',
                    repository_origin=RepositoryGrpcServerOrigin(
                        host='localhost',
                        socket=api_client.socket,
                        port=api_client.port,
                        repository_name='test_repository',
                    ),
                ),
                pipeline_run_id=pipeline_run.run_id,
                instance_ref=instance.get_ref(),
            )
            stream_events_result_thread = threading.Thread(
                target=_stream_events_target,
                args=[streaming_results, api_client, execute_run_args])
            stream_events_result_thread.daemon = True
            stream_events_result_thread.start()
            poll_for_step_start(instance, pipeline_run.run_id)

            res = api_client.cancel_execution(
                cancel_execution_request=CancelExecutionRequest(
                    run_id=pipeline_run.run_id))
            assert res.success is True

            poll_for_run(instance, pipeline_run.run_id)

            logs = instance.all_logs(pipeline_run.run_id)
            assert (len([
                ev for ev in logs
                if ev.dagster_event.event_type_value == 'STEP_MATERIALIZATION'
            ]) < 20)

            # soft termination
            assert [
                ev for ev in logs
                if ev.dagster_event.event_type_value == 'STEP_FAILURE'
            ]
예제 #5
0
    def launch_run(self, context: LaunchRunContext) -> None:

        """
        Launch a run in an ECS task.

        Currently, Fargate is the only supported launchType and awsvpc is the
        only supported networkMode. These are the defaults that are set up by
        docker-compose when you use the Dagster ECS reference deployment.
        """
        run = context.pipeline_run
        family = sanitize_family(
            run.external_pipeline_origin.external_repository_origin.repository_location_origin.location_name
        )

        container_context = EcsContainerContext.create_for_run(run, self)

        metadata = self._task_metadata()
        pipeline_origin = context.pipeline_code_origin
        image = pipeline_origin.repository_origin.container_image
        task_definition = self._task_definition(family, metadata, image, container_context)[
            "family"
        ]

        args = ExecuteRunArgs(
            pipeline_origin=pipeline_origin,
            pipeline_run_id=run.run_id,
            instance_ref=self._instance.get_ref(),
        )
        command = args.get_command_args()

        # Set cpu or memory overrides
        # https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-cpu-memory-error.html
        cpu_and_memory_overrides = {}
        tags = self._get_run_tags(run.run_id)
        if tags.cpu:
            cpu_and_memory_overrides["cpu"] = tags.cpu
        if tags.memory:
            cpu_and_memory_overrides["memory"] = tags.memory

        # Run a task using the same network configuration as this processes's
        # task.
        response = self.ecs.run_task(
            taskDefinition=task_definition,
            cluster=metadata.cluster,
            overrides={
                "containerOverrides": [
                    {
                        "name": self.container_name,
                        "command": command,
                        # containerOverrides expects cpu/memory as integers
                        **{k: int(v) for k, v in cpu_and_memory_overrides.items()},
                    }
                ],
                # taskOverrides expects cpu/memory as strings
                **cpu_and_memory_overrides,
            },
            networkConfiguration={
                "awsvpcConfiguration": {
                    "subnets": metadata.subnets,
                    "assignPublicIp": metadata.assign_public_ip,
                    "securityGroups": metadata.security_groups,
                }
            },
            launchType="FARGATE",
        )

        tasks = response["tasks"]

        if not tasks:
            failures = response["failures"]
            exceptions = []
            for failure in failures:
                arn = failure.get("arn")
                reason = failure.get("reason")
                detail = failure.get("detail")
                exceptions.append(Exception(f"Task {arn} failed because {reason}: {detail}"))
            raise Exception(exceptions)

        arn = tasks[0]["taskArn"]
        self._set_run_tags(run.run_id, task_arn=arn)
        self._set_ecs_tags(run.run_id, task_arn=arn)
        self._instance.report_engine_event(
            message="Launching run in ECS task",
            pipeline_run=run,
            engine_event_data=EngineEventData(
                [
                    MetadataEntry("ECS Task ARN", value=arn),
                    MetadataEntry("ECS Cluster", value=metadata.cluster),
                    MetadataEntry("Run ID", value=run.run_id),
                ]
            ),
            cls=self.__class__,
        )
예제 #6
0
def test_raise_on_error(kubeconfig_file):
    mock_k8s_client_batch_api = mock.MagicMock()
    celery_k8s_run_launcher = CeleryK8sRunLauncher(
        instance_config_map="dagster-instance",
        postgres_password_secret="dagster-postgresql-secret",
        dagster_home="/opt/dagster/dagster_home",
        load_incluster_config=False,
        kubeconfig_file=kubeconfig_file,
        k8s_client_batch_api=mock_k8s_client_batch_api,
        fail_pod_on_run_failure=True,
    )
    # Create fake external pipeline.
    recon_pipeline = reconstructable(fake_pipeline)
    recon_repo = recon_pipeline.repository
    with instance_for_test() as instance:
        with in_process_test_workspace(instance, recon_repo) as workspace:
            location = workspace.get_repository_location(
                workspace.repository_location_names[0])

            repo_def = recon_repo.get_definition()
            repo_handle = RepositoryHandle(
                repository_name=repo_def.name,
                repository_location=location,
            )
            fake_external_pipeline = external_pipeline_from_recon_pipeline(
                recon_pipeline,
                solid_selection=None,
                repository_handle=repo_handle,
            )

            celery_k8s_run_launcher.register_instance(instance)
            pipeline_name = "demo_pipeline"
            run_config = {
                "execution": {
                    "celery-k8s": {
                        "config": {
                            "job_image": "fake-image-name"
                        }
                    }
                }
            }
            run = create_run_for_test(
                instance,
                pipeline_name=pipeline_name,
                run_config=run_config,
                external_pipeline_origin=fake_external_pipeline.
                get_external_origin(),
                pipeline_code_origin=fake_external_pipeline.get_python_origin(
                ),
            )
            celery_k8s_run_launcher.launch_run(LaunchRunContext(
                run, workspace))

            # Check that user defined k8s config was passed down to the k8s job.
            mock_method_calls = mock_k8s_client_batch_api.method_calls
            assert len(mock_method_calls) > 0
            method_name, _args, kwargs = mock_method_calls[0]
            assert method_name == "create_namespaced_job"

            container = kwargs["body"].spec.template.spec.containers[0]

            args = container.args
            assert (args == ExecuteRunArgs(
                pipeline_origin=run.pipeline_code_origin,
                pipeline_run_id=run.run_id,
                instance_ref=instance.get_ref(),
                set_exit_code_on_failure=True,
            ).get_command_args())
예제 #7
0
def test_user_defined_k8s_config_in_run_tags(kubeconfig_file):

    labels = {"foo_label_key": "bar_label_value"}

    # Construct a K8s run launcher in a fake k8s environment.
    mock_k8s_client_batch_api = mock.MagicMock()
    celery_k8s_run_launcher = CeleryK8sRunLauncher(
        instance_config_map="dagster-instance",
        postgres_password_secret="dagster-postgresql-secret",
        dagster_home="/opt/dagster/dagster_home",
        load_incluster_config=False,
        kubeconfig_file=kubeconfig_file,
        k8s_client_batch_api=mock_k8s_client_batch_api,
        labels=labels,
    )

    # Construct Dagster run tags with user defined k8s config.
    expected_resources = {
        "requests": {
            "cpu": "250m",
            "memory": "64Mi"
        },
        "limits": {
            "cpu": "500m",
            "memory": "2560Mi"
        },
    }
    user_defined_k8s_config = UserDefinedDagsterK8sConfig(
        container_config={"resources": expected_resources}, )
    user_defined_k8s_config_json = json.dumps(
        user_defined_k8s_config.to_dict())
    tags = {"dagster-k8s/config": user_defined_k8s_config_json}

    # Create fake external pipeline.
    recon_pipeline = reconstructable(fake_pipeline)
    recon_repo = recon_pipeline.repository
    with instance_for_test() as instance:
        with in_process_test_workspace(instance, recon_repo) as workspace:
            location = workspace.get_repository_location(
                workspace.repository_location_names[0])

            repo_def = recon_repo.get_definition()
            repo_handle = RepositoryHandle(
                repository_name=repo_def.name,
                repository_location=location,
            )
            fake_external_pipeline = external_pipeline_from_recon_pipeline(
                recon_pipeline,
                solid_selection=None,
                repository_handle=repo_handle,
            )

            celery_k8s_run_launcher.register_instance(instance)
            pipeline_name = "demo_pipeline"
            run_config = {
                "execution": {
                    "celery-k8s": {
                        "config": {
                            "job_image": "fake-image-name"
                        }
                    }
                }
            }
            run = create_run_for_test(
                instance,
                pipeline_name=pipeline_name,
                run_config=run_config,
                tags=tags,
                external_pipeline_origin=fake_external_pipeline.
                get_external_origin(),
                pipeline_code_origin=fake_external_pipeline.get_python_origin(
                ),
            )
            celery_k8s_run_launcher.launch_run(LaunchRunContext(
                run, workspace))

            updated_run = instance.get_run_by_id(run.run_id)
            assert updated_run.tags[DOCKER_IMAGE_TAG] == "fake-image-name"

            # Check that user defined k8s config was passed down to the k8s job.
            mock_method_calls = mock_k8s_client_batch_api.method_calls
            assert len(mock_method_calls) > 0
            method_name, _args, kwargs = mock_method_calls[0]
            assert method_name == "create_namespaced_job"

            container = kwargs["body"].spec.template.spec.containers[0]

            job_resources = container.resources
            assert job_resources == expected_resources

            labels = kwargs["body"].spec.template.metadata.labels
            assert labels["foo_label_key"] == "bar_label_value"

            args = container.args
            assert (args == ExecuteRunArgs(
                pipeline_origin=run.pipeline_code_origin,
                pipeline_run_id=run.run_id,
                instance_ref=instance.get_ref(),
                set_exit_code_on_failure=None,
            ).get_command_args())
예제 #8
0
def test_launcher_with_container_context(kubeconfig_file):
    # Construct a K8s run launcher in a fake k8s environment.
    mock_k8s_client_batch_api = mock.MagicMock()
    k8s_run_launcher = K8sRunLauncher(
        service_account_name="dagit-admin",
        instance_config_map="dagster-instance",
        postgres_password_secret="dagster-postgresql-secret",
        dagster_home="/opt/dagster/dagster_home",
        job_image="fake_job_image",
        load_incluster_config=False,
        kubeconfig_file=kubeconfig_file,
        k8s_client_batch_api=mock_k8s_client_batch_api,
        env_vars=["FOO_TEST"],
    )

    container_context_config = {"k8s": {"env_vars": ["BAR_TEST"]}}

    # Create fake external pipeline.
    recon_pipeline = reconstructable(fake_pipeline)
    recon_repo = recon_pipeline.repository
    repo_def = recon_repo.get_definition()

    python_origin = recon_pipeline.get_python_origin()
    python_origin = python_origin._replace(
        repository_origin=python_origin.repository_origin._replace(
            container_context=container_context_config,
        )
    )
    loadable_target_origin = LoadableTargetOrigin(python_file=__file__)

    with instance_for_test() as instance:
        with in_process_test_workspace(instance, loadable_target_origin) as workspace:
            location = workspace.get_repository_location(workspace.repository_location_names[0])
            repo_handle = RepositoryHandle(
                repository_name=repo_def.name,
                repository_location=location,
            )
            fake_external_pipeline = external_pipeline_from_recon_pipeline(
                recon_pipeline,
                solid_selection=None,
                repository_handle=repo_handle,
            )

            # Launch the run in a fake Dagster instance.
            pipeline_name = "demo_pipeline"
            run = create_run_for_test(
                instance,
                pipeline_name=pipeline_name,
                external_pipeline_origin=fake_external_pipeline.get_external_origin(),
                pipeline_code_origin=python_origin,
            )
            k8s_run_launcher.register_instance(instance)
            k8s_run_launcher.launch_run(LaunchRunContext(run, workspace))

            updated_run = instance.get_run_by_id(run.run_id)
            assert updated_run.tags[DOCKER_IMAGE_TAG] == "fake_job_image"

        # Check that user defined k8s config was passed down to the k8s job.
        mock_method_calls = mock_k8s_client_batch_api.method_calls
        assert len(mock_method_calls) > 0
        method_name, _args, kwargs = mock_method_calls[0]
        assert method_name == "create_namespaced_job"

        container = kwargs["body"].spec.template.spec.containers[0]

        env_names = [env.name for env in container.env]

        assert "BAR_TEST" in env_names
        assert "FOO_TEST" in env_names

        args = container.args
        assert (
            args
            == ExecuteRunArgs(
                pipeline_origin=run.pipeline_code_origin,
                pipeline_run_id=run.run_id,
                instance_ref=instance.get_ref(),
                set_exit_code_on_failure=None,
            ).get_command_args()
        )