Beispiel #1
0
    def config_type(cls):
        '''Include all arguments required for DagsterK8sJobConfig along with additional arguments
        needed for the RunLauncher itself.
        '''
        job_cfg = DagsterK8sJobConfig.config_type()

        run_launcher_extra_cfg = {
            'job_namespace':
            str,
            'load_incluster_config':
            Field(bool, is_required=False, default_value=True),
            'kubeconfig_file':
            Field(Noneable(str), is_required=False, default_value=None),
        }
        return merge_dicts(job_cfg, run_launcher_extra_cfg)
Beispiel #2
0
def test_backfill_partition_range(backfill_args_context):
    with backfill_args_context as (cli_args, instance):
        args = merge_dicts(cli_args, {
            "pipeline": "baz",
            "partition_set": "baz_partitions",
            "from": "7"
        })
        run_test_backfill(args, instance, expected_count=3)

        args = merge_dicts(cli_args, {
            "pipeline": "baz",
            "partition_set": "baz_partitions",
            "to": "2"
        })
        run_test_backfill(args, instance, expected_count=6)  # 3 more runs

        args = merge_dicts(
            cli_args, {
                "pipeline": "baz",
                "partition_set": "baz_partitions",
                "from": "2",
                "to": "5"
            })
        run_test_backfill(args, instance, expected_count=10)  # 4 more runs
Beispiel #3
0
def _create_sensor_run(instance, repo_location, external_sensor,
                       external_pipeline, run_request):
    external_execution_plan = repo_location.get_external_execution_plan(
        external_pipeline,
        run_request.run_config,
        external_sensor.mode,
        step_keys_to_execute=None,
    )
    execution_plan_snapshot = external_execution_plan.execution_plan_snapshot

    pipeline_tags = external_pipeline.tags or {}
    check_tags(pipeline_tags, "pipeline_tags")
    tags = merge_dicts(
        merge_dicts(pipeline_tags, run_request.tags),
        PipelineRun.tags_for_sensor(external_sensor),
    )
    if run_request.run_key:
        tags[RUN_KEY_TAG] = run_request.run_key

    return instance.create_run(
        pipeline_name=external_sensor.pipeline_name,
        run_id=None,
        run_config=run_request.run_config,
        mode=external_sensor.mode,
        solids_to_execute=external_pipeline.solids_to_execute,
        step_keys_to_execute=None,
        status=PipelineRunStatus.NOT_STARTED,
        solid_selection=external_sensor.solid_selection,
        root_run_id=None,
        parent_run_id=None,
        tags=tags,
        pipeline_snapshot=external_pipeline.pipeline_snapshot,
        execution_plan_snapshot=execution_plan_snapshot,
        parent_pipeline_snapshot=external_pipeline.parent_pipeline_snapshot,
        external_pipeline_origin=external_pipeline.get_external_origin(),
    )
Beispiel #4
0
    def config_type(cls):
        """Include all arguments required for DagsterK8sJobConfig along with additional arguments
        needed for the RunLauncher itself.
        """
        job_cfg = DagsterK8sJobConfig.config_type()

        run_launcher_extra_cfg = {
            "job_namespace":
            Field(StringSource, is_required=False, default_value="default"),
            "load_incluster_config":
            Field(bool, is_required=False, default_value=True),
            "kubeconfig_file":
            Field(Noneable(str), is_required=False, default_value=None),
        }
        return merge_dicts(job_cfg, run_launcher_extra_cfg)
 def merge(self, other: "DockerContainerContext"):
     # Combines config set at a higher level with overrides/additions that are set at a lower
     # level. For example, a certain set of config set in the `DockerRunLauncher`` can be
     # combined with config set at the step level in the `docker_executor`.
     # Lists of env vars and secrets are appended, the registry is replaced, and the
     # `container_kwargs` field does a shallow merge so that different kwargs can be combined
     # or replaced without replacing the full set of arguments.
     return DockerContainerContext(
         registry=other.registry
         if other.registry != None else self.registry,
         env_vars=self.env_vars + other.env_vars,
         networks=self.networks + other.networks,
         container_kwargs=merge_dicts(other.container_kwargs,
                                      self.container_kwargs),
     )
Beispiel #6
0
def test_execute_on_celery_k8s_with_termination(  # pylint: disable=redefined-outer-name
    dagster_docker_image, dagster_instance, helm_namespace
):
    run_config = merge_dicts(
        merge_yamls(
            [
                os.path.join(get_test_project_environments_path(), "env_s3.yaml"),
            ]
        ),
        get_celery_engine_config(
            dagster_docker_image=dagster_docker_image, job_namespace=helm_namespace
        ),
    )

    _test_termination(dagster_instance, run_config)
Beispiel #7
0
    def __new__(
        cls,
        pipeline_name,
        key_suffix,
        step_inputs,
        step_outputs,
        compute_fn,
        kind,
        solid_handle,
        solid,
        logging_tags=None,
    ):
        check.inst_param(solid, "solid", Solid)

        return super(ExecutionStep, cls).__new__(
            cls,
            pipeline_name=check.str_param(pipeline_name, "pipeline_name"),
            key_suffix=check.str_param(key_suffix, "key_suffix"),
            step_inputs=check.list_param(step_inputs,
                                         "step_inputs",
                                         of_type=StepInput),
            step_input_dict={si.name: si
                             for si in step_inputs},
            step_outputs=check.list_param(step_outputs,
                                          "step_outputs",
                                          of_type=StepOutput),
            step_output_dict={so.name: so
                              for so in step_outputs},
            compute_fn=check.callable_param(
                compute_fn, "compute_fn"
            ),  # Compute_fn is the compute function for the step.
            #     Not to be confused with the compute_fn of the passed in solid.
            kind=check.inst_param(kind, "kind", StepKind),
            solid_handle=check.inst_param(solid_handle, "solid_handle",
                                          SolidHandle),
            solid_version=solid.definition.version,
            logging_tags=merge_dicts(
                {
                    "step_key": str(solid_handle) + "." + key_suffix,
                    "pipeline": pipeline_name,
                    "solid": solid_handle.name,
                    "solid_definition": solid.definition.name,
                },
                check.opt_dict_param(logging_tags, "logging_tags"),
            ),
            tags=solid.tags,
            hook_defs=solid.hook_defs,
        )
Beispiel #8
0
def test_execute_on_celery(  # pylint: disable=redefined-outer-name
    dagster_docker_image, dagster_instance, helm_namespace
):
    environment_dict = merge_dicts(
        merge_yamls(
            [
                os.path.join(test_project_environments_path(), 'env.yaml'),
                os.path.join(test_project_environments_path(), 'env_s3.yaml'),
            ]
        ),
        {
            'execution': {
                'celery-k8s': {
                    'config': {
                        'broker': {'env': 'DAGSTER_K8S_CELERY_BROKER'},
                        'backend': {'env': 'DAGSTER_K8S_CELERY_BACKEND'},
                        'job_image': dagster_docker_image,
                        'job_namespace': helm_namespace,
                        'instance_config_map': 'dagster-instance',
                        'postgres_password_secret': 'dagster-postgresql-secret',
                        'image_pull_policy': 'Always',
                        'env_config_maps': ['dagster-pipeline-env'],
                    }
                }
            },
        },
    )

    pipeline_name = 'demo_pipeline_celery'
    run = create_run_for_test(
        dagster_instance,
        pipeline_name=pipeline_name,
        environment_dict=environment_dict,
        mode='default',
    )

    dagster_instance.launch_run(run.run_id)

    result = wait_for_job_and_get_logs(
        job_name='dagster-run-%s' % run.run_id, namespace=helm_namespace
    )

    assert not result.get('errors')
    assert result['data']
    assert (
        result['data']['startPipelineExecutionForCreatedRun']['__typename']
        == 'StartPipelineRunSuccess'
    )
Beispiel #9
0
def test_k8s_run_launcher_image_from_origin(
    dagster_instance_for_k8s_run_launcher,
    helm_namespace_for_k8s_run_launcher,
    dagit_url_for_k8s_run_launcher,
):
    # Like the previous test, but the executor doesn't supply an image - it's pulled
    # from the origin on the run instead
    pods = DagsterKubernetesClient.production_client(
    ).core_api.list_namespaced_pod(
        namespace=helm_namespace_for_k8s_run_launcher)
    celery_pod_names = [
        p.metadata.name for p in pods.items
        if "celery-workers" in p.metadata.name
    ]
    check.invariant(not celery_pod_names)

    run_config = merge_dicts(
        load_yaml_from_path(
            os.path.join(get_test_project_environments_path(), "env.yaml")),
        load_yaml_from_path(
            os.path.join(get_test_project_environments_path(), "env_s3.yaml")),
        {
            "execution": {
                "k8s": {
                    "config": {
                        "job_namespace": helm_namespace_for_k8s_run_launcher,
                        "image_pull_policy": image_pull_policy(),
                    }
                }
            },
        },
    )

    pipeline_name = "demo_k8s_executor_pipeline"

    run_id = launch_run_over_graphql(dagit_url_for_k8s_run_launcher,
                                     run_config=run_config,
                                     pipeline_name=pipeline_name)

    result = wait_for_job_and_get_raw_logs(
        job_name="dagster-run-%s" % run_id,
        namespace=helm_namespace_for_k8s_run_launcher)

    assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(result)

    updated_run = dagster_instance_for_k8s_run_launcher.get_run_by_id(run_id)
    assert updated_run.tags[DOCKER_IMAGE_TAG] == get_test_project_docker_image(
    )
    def launch_run(self, instance, run, external_pipeline):
        check.inst_param(run, "run", PipelineRun)
        check.inst_param(external_pipeline, "external_pipeline",
                         ExternalPipeline)

        repository_location_handle = external_pipeline.repository_handle.repository_location_handle

        check.inst(
            repository_location_handle,
            GRPC_REPOSITORY_LOCATION_HANDLE_TYPES,
            "DefaultRunLauncher: Can't launch runs for pipeline not loaded from a GRPC server",
        )

        self._instance.add_run_tags(
            run.run_id,
            {
                GRPC_INFO_TAG:
                seven.json.dumps(
                    merge_dicts(
                        {"host": repository_location_handle.host},
                        ({
                            "port": repository_location_handle.port
                        } if repository_location_handle.port else {
                            "socket": repository_location_handle.socket
                        }),
                        ({
                            "use_ssl": True
                        } if repository_location_handle.use_ssl else {}),
                    ))
            },
        )

        res = repository_location_handle.client.start_run(
            ExecuteExternalPipelineArgs(
                pipeline_origin=external_pipeline.get_external_origin(),
                pipeline_run_id=run.run_id,
                instance_ref=self._instance.get_ref(),
            ))

        if not res.success:
            raise (DagsterLaunchFailedError(
                res.message,
                serializable_error_info=res.serializable_error_info))

        self._run_id_to_repository_location_handle_cache[
            run.run_id] = repository_location_handle

        return run
Beispiel #11
0
def get_failing_celery_job_engine_config(dagster_docker_image, job_namespace):
    return {
        "execution": {
            "config":
            merge_dicts(
                ({
                    "job_image": dagster_docker_image,
                } if dagster_docker_image else {}),
                {
                    "job_namespace": job_namespace,
                    "image_pull_policy": image_pull_policy(),
                    "env_config_maps": ["non-existent-config-map"],
                },
            )
        },
    }
Beispiel #12
0
def k8s_mode_defs(resources=None, name="default"):
    from dagster_k8s.executor import k8s_job_executor

    resources = resources if resources else {"s3": s3_resource}
    resources = merge_dicts(resources, {"io_manager": s3_pickle_io_manager})

    return [
        ModeDefinition(
            name=name,
            resource_defs=resources if resources else {
                "s3": s3_resource,
                "io_manager": s3_pickle_io_manager
            },
            executor_defs=default_executors + [k8s_job_executor],
        )
    ]
Beispiel #13
0
def test_map_fail(run_config):
    with instance_for_test() as instance:
        result = execute_pipeline(
            reconstructable(dynamic_pipeline),
            instance=instance,
            run_config=merge_dicts(
                {"solids": {
                    "emit": {
                        "config": {
                            "fail": True
                        }
                    }
                }}, run_config),
            raise_on_error=False,
        )
        assert not result.success
Beispiel #14
0
def _default_cli_test_instance_tempdir(temp_dir, overrides=None):
    default_overrides = {
        "run_launcher": {
            "module": "dagster.core.test_utils",
            "class": "MockedRunLauncher",
        }
    }
    with instance_for_test(
            temp_dir=temp_dir,
            overrides=merge_dicts(default_overrides,
                                  (overrides if overrides else {})),
    ) as instance:
        with mock.patch(
                "dagster.core.instance.DagsterInstance.get") as _instance:
            _instance.return_value = instance
            yield instance
Beispiel #15
0
def scheduler_instance(overrides=None):
    with tempfile.TemporaryDirectory() as temp_dir:
        with _default_cli_test_instance_tempdir(
            temp_dir,
            overrides=merge_dicts(
                {
                    "scheduler": {
                        "module": "dagster.utils.test",
                        "class": "FilesystemTestScheduler",
                        "config": {"base_dir": temp_dir},
                    }
                },
                overrides if overrides else {},
            ),
        ) as instance:
            yield instance
Beispiel #16
0
def test_map_empty(run_config):
    with instance_for_test() as instance:
        result = execute_pipeline(
            reconstructable(dynamic_pipeline),
            instance=instance,
            run_config=merge_dicts(
                {"solids": {
                    "num_range": {
                        "config": {
                            "range": 0
                        }
                    }
                }}, run_config),
        )
        assert result.success
        assert result.result_for_solid("double_total").output_value() == 0
Beispiel #17
0
def test_execute_on_celery_k8s_with_env_var_and_termination(  # pylint: disable=redefined-outer-name
    dagster_docker_image, dagster_instance, set_dagster_k8s_pipeline_run_namespace_env
):
    run_config = merge_dicts(
        merge_yamls(
            [
                os.path.join(get_test_project_environments_path(), "env_s3.yaml"),
            ]
        ),
        get_celery_engine_config(
            dagster_docker_image=dagster_docker_image,
            job_namespace={"env": "DAGSTER_K8S_PIPELINE_RUN_NAMESPACE"},
        ),
    )

    _test_termination(dagster_instance, run_config)
Beispiel #18
0
def _get_existing_run_for_request(instance, external_schedule, schedule_time,
                                  run_request):
    tags = merge_dicts(
        PipelineRun.tags_for_schedule(external_schedule),
        {
            SCHEDULED_EXECUTION_TIME_TAG:
            schedule_time.in_tz("UTC").isoformat(),
        },
    )
    if run_request.run_key:
        tags[RUN_KEY_TAG] = run_request.run_key
    runs_filter = PipelineRunsFilter(tags=tags)
    existing_runs = instance.get_runs(runs_filter)
    if not len(existing_runs):
        return None
    return existing_runs[0]
Beispiel #19
0
def celery_docker_config():
    additional_config = {
        'docker':
        Field(
            {
                'image':
                Field(
                    StringSource,
                    is_required=True,
                    description=
                    'The docker image to be used for step execution.',
                ),
                'registry':
                Field(
                    {
                        'url': Field(StringSource),
                        'username': Field(StringSource),
                        'password': Field(StringSource),
                    },
                    is_required=False,
                    description=
                    'Information for using a non local/public docker registry',
                ),
                'env_vars':
                Field(
                    [str],
                    is_required=False,
                    description=
                    'The list of environment variables names to forward from the celery worker in to the docker container',
                ),
            },
            is_required=True,
            description=
            'The configuration for interacting with docker in the celery worker.',
        ),
        'repo_location_name':
        Field(
            StringSource,
            is_required=False,
            default_value=IN_PROCESS_NAME,
            description=
            '[temporary workaround] The repository location name to use for execution.',
        ),
    }

    cfg = merge_dicts(CELERY_CONFIG, additional_config)
    return cfg
Beispiel #20
0
def _create_scheduler_run(
    instance,
    schedule_time,
    repo_location,
    external_schedule,
    external_pipeline,
    run_request,
):
    run_config = run_request.run_config
    schedule_tags = run_request.tags

    external_execution_plan = repo_location.get_external_execution_plan(
        external_pipeline,
        run_config,
        external_schedule.mode,
        step_keys_to_execute=None,
        known_state=None,
    )
    execution_plan_snapshot = external_execution_plan.execution_plan_snapshot

    pipeline_tags = external_pipeline.tags or {}
    check_tags(pipeline_tags, "pipeline_tags")
    tags = merge_dicts(pipeline_tags, schedule_tags)

    tags[SCHEDULED_EXECUTION_TIME_TAG] = to_timezone(schedule_time,
                                                     "UTC").isoformat()
    if run_request.run_key:
        tags[RUN_KEY_TAG] = run_request.run_key

    return instance.create_run(
        pipeline_name=external_schedule.pipeline_name,
        run_id=None,
        run_config=run_config,
        mode=external_schedule.mode,
        solids_to_execute=external_pipeline.solids_to_execute,
        step_keys_to_execute=None,
        solid_selection=external_pipeline.solid_selection,
        status=PipelineRunStatus.NOT_STARTED,
        root_run_id=None,
        parent_run_id=None,
        tags=tags,
        pipeline_snapshot=external_pipeline.pipeline_snapshot,
        execution_plan_snapshot=execution_plan_snapshot,
        parent_pipeline_snapshot=external_pipeline.parent_pipeline_snapshot,
        external_pipeline_origin=external_pipeline.get_external_origin(),
        pipeline_code_origin=external_pipeline.get_python_origin(),
    )
Beispiel #21
0
def start_scheduled_execution(graphene_info, schedule_name):
    from dagster_graphql.schema.roots import create_execution_metadata

    check.inst_param(graphene_info, 'graphene_info', ResolveInfo)
    check.str_param(schedule_name, 'schedule_name')

    schedule = get_dagster_schedule(graphene_info, schedule_name)
    schedule_def = get_dagster_schedule_def(graphene_info, schedule_name)

    schedule_context = ScheduleExecutionContext(graphene_info.context.instance)

    # Run should_execute and halt if it returns False
    if not schedule_def.should_execute(schedule_context):
        return graphene_info.schema.type_named('ScheduledExecutionBlocked')(
            message='Schedule {schedule_name} did not run because the should_execute did not return'
            ' True'.format(schedule_name=schedule_name)
        )

    # Get environment_dict
    environment_dict = schedule_def.get_environment_dict(schedule_context)
    tags = schedule_def.get_tags(schedule_context)

    check.invariant('dagster/schedule_id' not in tags)
    tags['dagster/schedule_id'] = schedule.schedule_id

    check.invariant('dagster/schedule_name' not in tags)
    tags['dagster/schedule_name'] = schedule_def.name

    execution_metadata_tags = [{'key': key, 'value': value} for key, value in tags.items()]
    execution_params = merge_dicts(
        schedule_def.execution_params, {'executionMetadata': {'tags': execution_metadata_tags}}
    )

    selector = ExecutionSelector(
        execution_params['selector']['name'], execution_params['selector'].get('solidSubset')
    )

    execution_params = ExecutionParams(
        selector=selector,
        environment_dict=environment_dict,
        mode=execution_params.get('mode'),
        execution_metadata=create_execution_metadata(execution_params.get('executionMetadata')),
        step_keys=execution_params.get('stepKeys'),
        previous_run_id=None,
    )

    return start_pipeline_execution(graphene_info, execution_params)
Beispiel #22
0
def test_success_whole_execution_plan_with_in_memory_config(
        graphql_context, snapshot):
    instance = graphql_context.instance
    environment_dict = merge_dicts(csv_hello_world_solids_config(),
                                   {'storage': {
                                       'in_memory': {}
                                   }})
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=csv_hello_world, environment_dict=environment_dict)
    result = execute_dagster_graphql(
        graphql_context,
        EXECUTE_PLAN_QUERY,
        variables={
            'executionParams': {
                'selector': {
                    'name': 'csv_hello_world'
                },
                'environmentConfigData': environment_dict,
                'stepKeys': None,
                'executionMetadata': {
                    'runId': pipeline_run.run_id
                },
                'mode': 'default',
            }
        },
    )

    query_result = result.data['executePlan']

    assert query_result['__typename'] == 'ExecutePlanSuccess'
    assert query_result['pipeline']['name'] == 'csv_hello_world'
    assert query_result['hasFailures'] is False
    step_events = {
        step_event['step']['key']: step_event
        for step_event in query_result['stepEvents'] if step_event['step']
    }
    assert 'sum_solid.compute' in step_events
    assert 'sum_sq_solid.compute' in step_events

    snapshot.assert_match(clean_log_messages(result.data))
    store = build_fs_intermediate_store(instance.intermediates_directory,
                                        pipeline_run.run_id)
    intermediates_manager = IntermediateStoreIntermediatesManager(store)
    assert not intermediates_manager.has_intermediate(
        None, StepOutputHandle('sum_solid.compute'))
    assert not intermediates_manager.has_intermediate(
        None, StepOutputHandle('sum_sq_solid.compute'))
Beispiel #23
0
    def frequent_celery():
        from dagster_celery_k8s.config import get_celery_engine_config

        additional_env_config_maps = ["test-aws-env-configmap"] if not IS_BUILDKITE else []

        return merge_dicts(
            merge_yamls(
                [
                    file_relative_path(__file__, os.path.join("..", "environments", "env.yaml")),
                    file_relative_path(__file__, os.path.join("..", "environments", "env_s3.yaml")),
                ]
            ),
            get_celery_engine_config(
                image_pull_policy=image_pull_policy(),
                additional_env_config_maps=additional_env_config_maps,
            ),
        )
Beispiel #24
0
def test_map_selection(run_config):
    with instance_for_test() as instance:
        result = execute_pipeline(
            reconstructable(dynamic_pipeline),
            instance=instance,
            run_config=merge_dicts(
                {"solids": {
                    "emit": {
                        "inputs": {
                            "num": 2
                        }
                    }
                }}, run_config),
            solid_selection=["emit*", "emit_ten"],
        )
        assert result.success
        assert result.result_for_solid("double_total").output_value() == 40
Beispiel #25
0
 def __new__(
     cls,
     pipeline_name,
     key_suffix,
     step_inputs,
     step_outputs,
     compute_fn,
     kind,
     solid_handle,
     solid,
     logging_tags=None,
     tags=None,
     hook_defs=None,
 ):
     return super(ExecutionStep, cls).__new__(
         cls,
         pipeline_name=check.str_param(pipeline_name, 'pipeline_name'),
         key_suffix=check.str_param(key_suffix, 'key_suffix'),
         step_inputs=check.list_param(step_inputs,
                                      'step_inputs',
                                      of_type=StepInput),
         step_input_dict={si.name: si
                          for si in step_inputs},
         step_outputs=check.list_param(step_outputs,
                                       'step_outputs',
                                       of_type=StepOutput),
         step_output_dict={so.name: so
                           for so in step_outputs},
         compute_fn=check.callable_param(compute_fn, 'compute_fn'),
         kind=check.inst_param(kind, 'kind', StepKind),
         solid_handle=check.inst_param(solid_handle, 'solid_handle',
                                       SolidHandle),
         logging_tags=merge_dicts(
             {
                 'step_key': str(solid_handle) + '.' + key_suffix,
                 'pipeline': pipeline_name,
                 'solid': solid_handle.name,
                 'solid_definition': solid.definition.name,
             },
             check.opt_dict_param(logging_tags, 'logging_tags'),
         ),
         tags=check.opt_inst_param(tags, 'tags', frozentags),
         hook_defs=check.opt_set_param(hook_defs,
                                       'hook_defs',
                                       of_type=HookDefinition),
     )
Beispiel #26
0
def dagster_instance_config(base_dir,
                            config_filename=DAGSTER_CONFIG_YAML_FILENAME,
                            overrides=None):
    overrides = check.opt_dict_param(overrides, 'overrides')
    dagster_config_dict = merge_dicts(
        load_yaml_from_globs(os.path.join(base_dir, config_filename)),
        overrides)
    dagster_config_type = resolve_to_config_type(define_dagster_config_cls())
    dagster_config = validate_config(dagster_config_type, dagster_config_dict)
    if not dagster_config.success:
        raise DagsterInvalidConfigError(
            'Errors whilst loading dagster instance config at {}.'.format(
                config_filename),
            dagster_config.errors,
            dagster_config_dict,
        )
    return dagster_config.value
Beispiel #27
0
def celery_docker_config():
    additional_config = {
        "docker":
        Field(
            {
                "image":
                Field(
                    StringSource,
                    is_required=False,
                    description=
                    "The docker image to be used for step execution.",
                ),
                "registry":
                Field(
                    {
                        "url": Field(StringSource),
                        "username": Field(StringSource),
                        "password": Field(StringSource),
                    },
                    is_required=False,
                    description=
                    "Information for using a non local/public docker registry",
                ),
                "env_vars":
                Field(
                    [str],
                    is_required=False,
                    description=
                    "The list of environment variables names to forward from the celery worker in to the docker container",
                ),
                "network":
                Field(
                    str,
                    is_required=False,
                    description=
                    "Name of the network this container will be connected to at creation time",
                ),
            },
            is_required=True,
            description=
            "The configuration for interacting with docker in the celery worker.",
        ),
    }

    cfg = merge_dicts(CELERY_CONFIG, additional_config)
    return cfg
def default_instance_tempdir(temp_dir, overrides=None):
    default_overrides = {
        "run_launcher": {
            "module":
            "dagster_tests.cli_tests.command_tests.test_cli_commands",
            "class": "InMemoryRunLauncher",
        }
    }
    with instance_for_test_tempdir(
            temp_dir,
            overrides=merge_dicts(
                default_overrides,
                (overrides if overrides else {}))) as instance:
        with mock.patch(
                "dagster.core.instance.DagsterInstance.get") as _instance:
            _instance.return_value = instance
            yield instance
Beispiel #29
0
def test_success_whole_execution_plan_with_in_memory_config(snapshot):
    run_id = make_new_run_id()
    instance = DagsterInstance.ephemeral()
    instance.create_empty_run(run_id, 'csv_hello_world')
    result = execute_dagster_graphql(
        define_test_context(instance=instance),
        EXECUTE_PLAN_QUERY,
        variables={
            'executionParams': {
                'selector': {
                    'name': 'csv_hello_world'
                },
                'environmentConfigData':
                merge_dicts(csv_hello_world_solids_config(),
                            {'storage': {
                                'in_memory': {}
                            }}),
                'stepKeys':
                None,
                'executionMetadata': {
                    'runId': run_id
                },
                'mode':
                'default',
            }
        },
    )

    query_result = result.data['executePlan']

    assert query_result['__typename'] == 'ExecutePlanSuccess'
    assert query_result['pipeline']['name'] == 'csv_hello_world'
    assert query_result['hasFailures'] is False
    step_events = {
        step_event['step']['key']: step_event
        for step_event in query_result['stepEvents'] if step_event['step']
    }
    assert 'sum_solid.compute' in step_events
    assert 'sum_sq_solid.compute' in step_events

    snapshot.assert_match(clean_log_messages(result.data))
    store = build_fs_intermediate_store(instance.intermediates_directory,
                                        run_id)
    assert not store.has_intermediate(None, 'sum_solid.compute')
    assert not store.has_intermediate(None, 'sum_sq_solid.compute')
Beispiel #30
0
def test_k8s_run_launcher_with_celery_executor_fails(
    dagster_docker_image,
    dagster_instance_for_k8s_run_launcher,
    helm_namespace_for_k8s_run_launcher,
    dagit_url_for_k8s_run_launcher,
):
    run_config = merge_dicts(
        merge_yamls([
            os.path.join(get_test_project_environments_path(), "env.yaml"),
            os.path.join(get_test_project_environments_path(), "env_s3.yaml"),
        ]),
        get_celery_engine_config(
            dagster_docker_image=dagster_docker_image,
            job_namespace=helm_namespace_for_k8s_run_launcher,
        ),
    )

    pipeline_name = "demo_pipeline_celery"

    run_id = launch_run_over_graphql(dagit_url_for_k8s_run_launcher,
                                     run_config=run_config,
                                     pipeline_name=pipeline_name)

    timeout = datetime.timedelta(0, 120)

    start_time = datetime.datetime.now()

    while True:
        assert (datetime.datetime.now() <
                start_time + timeout), "Timed out waiting for pipeline failure"
        event_records = dagster_instance_for_k8s_run_launcher.all_logs(run_id)

        found_pipeline_failure = False
        for event_record in event_records:
            if event_record.dagster_event:
                if event_record.dagster_event.event_type == DagsterEventType.PIPELINE_FAILURE:
                    found_pipeline_failure = True

        if found_pipeline_failure:
            break

        time.sleep(5)

    assert (dagster_instance_for_k8s_run_launcher.get_run_by_id(run_id).status
            == PipelineRunStatus.FAILURE)