Exemple #1
0
def test_configured_input_should_skip_step():
    called = {}

    @solid(output_defs=[OutputDefinition(is_required=False)])
    def one(_):
        yield Output(1)

    @solid
    def solid_should_not_skip(_, input_one, input_two):  # pylint: disable=unused-argument
        called["yup"] = True

    @pipeline
    def my_pipeline():
        solid_should_not_skip(one())

    run_config = {
        "solids": {
            "solid_should_not_skip": {
                "inputs": {
                    "input_two": {
                        "value": "2"
                    }
                }
            }
        }
    }
    execute_pipeline(my_pipeline, run_config=run_config)
    assert called.get("yup")

    # ensure should_skip_step behave the same as execute_pipeline
    instance = DagsterInstance.ephemeral()
    pipeline_run = PipelineRun(pipeline_name="my_pipeline",
                               run_id=make_new_run_id())
    execute_plan(
        create_execution_plan(
            my_pipeline,
            step_keys_to_execute=["one"],
            run_config=run_config,
        ),
        InMemoryPipeline(my_pipeline),
        instance,
        pipeline_run,
        run_config=run_config,
    )
    assert not should_skip_step(
        create_execution_plan(
            my_pipeline,
            step_keys_to_execute=["solid_should_not_skip"],
            run_config=run_config,
        ),
        instance,
        pipeline_run.run_id,
    )
def test_valid_job_format(run_launcher, docker_image, environments_path):  # pylint: disable=redefined-outer-name
    run_id = uuid.uuid4().hex
    environment_dict = load_yaml_from_path(
        os.path.join(environments_path, 'env.yaml'))
    pipeline_name = 'demo_pipeline'
    run = PipelineRun.create_empty_run(pipeline_name, run_id, environment_dict)
    job = run_launcher.construct_job(run)

    assert (yaml.dump(
        remove_none_recursively(job.to_dict()),
        default_flow_style=False).strip() == EXPECTED_JOB_SPEC.format(
            run_id=run_id, job_image=docker_image).strip())
Exemple #3
0
def _create_sensor_run(
    instance, repo_location, external_sensor, external_pipeline, run_request, target_data
):
    from dagster.daemon.daemon import get_telemetry_daemon_session_id

    external_execution_plan = repo_location.get_external_execution_plan(
        external_pipeline,
        run_request.run_config,
        target_data.mode,
        step_keys_to_execute=None,
        known_state=None,
        instance=instance,
    )
    execution_plan_snapshot = external_execution_plan.execution_plan_snapshot

    pipeline_tags = external_pipeline.tags or {}
    check_tags(pipeline_tags, "pipeline_tags")
    tags = merge_dicts(
        merge_dicts(pipeline_tags, run_request.tags),
        PipelineRun.tags_for_sensor(external_sensor),
    )
    if run_request.run_key:
        tags[RUN_KEY_TAG] = run_request.run_key

    log_action(
        instance,
        SENSOR_RUN_CREATED,
        metadata={
            "DAEMON_SESSION_ID": get_telemetry_daemon_session_id(),
            "SENSOR_NAME_HASH": hash_name(external_sensor.name),
            "pipeline_name_hash": hash_name(external_pipeline.name),
            "repo_hash": hash_name(repo_location.name),
        },
    )

    return instance.create_run(
        pipeline_name=target_data.pipeline_name,
        run_id=None,
        run_config=run_request.run_config,
        mode=target_data.mode,
        solids_to_execute=external_pipeline.solids_to_execute,
        step_keys_to_execute=None,
        status=PipelineRunStatus.NOT_STARTED,
        solid_selection=target_data.solid_selection,
        root_run_id=None,
        parent_run_id=None,
        tags=tags,
        pipeline_snapshot=external_pipeline.pipeline_snapshot,
        execution_plan_snapshot=execution_plan_snapshot,
        parent_pipeline_snapshot=external_pipeline.parent_pipeline_snapshot,
        external_pipeline_origin=external_pipeline.get_external_origin(),
        pipeline_code_origin=external_pipeline.get_python_origin(),
    )
Exemple #4
0
    def get_tags(self, context):
        check.inst_param(context, 'context', ScheduleExecutionContext)
        if self._tags:
            tags = self._tags
            check_tags(tags, 'tags')
        else:
            tags = self._tags_fn(context)
            # These tags are checked in _tags_fn_wrapper

        tags = merge_dicts(tags, PipelineRun.tags_for_schedule(self))

        return tags
Exemple #5
0
def test_queue_from_schedule_and_sensor(instance, foo_example_workspace,
                                        foo_example_repo):
    external_schedule = foo_example_repo.get_external_schedule(
        "always_run_schedule")
    external_sensor = foo_example_repo.get_external_sensor("always_on_sensor")
    external_pipeline = foo_example_repo.get_full_external_pipeline(
        "foo_pipeline")

    instance.start_schedule(external_schedule)
    instance.start_sensor(external_sensor)

    with start_daemon(timeout=180,
                      workspace_file=file_relative_path(__file__, "repo.py")):
        run = create_run(instance, external_pipeline)
        instance.submit_run(run.run_id, foo_example_workspace)

        runs = [
            poll_for_finished_run(instance, run.run_id),
            poll_for_finished_run(
                instance,
                run_tags=PipelineRun.tags_for_sensor(external_sensor)),
            poll_for_finished_run(
                instance,
                run_tags=PipelineRun.tags_for_schedule(external_schedule),
                timeout=90,
            ),
        ]

        for run in runs:
            logs = instance.all_logs(run.run_id)
            assert_events_in_order(
                logs,
                [
                    "PIPELINE_ENQUEUED",
                    "PIPELINE_DEQUEUED",
                    "PIPELINE_STARTING",
                    "PIPELINE_START",
                    "PIPELINE_SUCCESS",
                ],
            )
Exemple #6
0
def start_pipeline_execution(graphene_info, execution_params,
                             reexecution_config):
    check.inst_param(graphene_info, 'graphene_info', ResolveInfo)
    check.inst_param(execution_params, 'execution_params', ExecutionParams)
    check.opt_inst_param(reexecution_config, 'reexecution_config',
                         ReexecutionConfig)

    instance = graphene_info.context.instance

    dauphin_pipeline = get_dauphin_pipeline_from_selector(
        graphene_info, execution_params.selector)

    get_validated_config(
        graphene_info,
        dauphin_pipeline,
        environment_dict=execution_params.environment_dict,
        mode=execution_params.mode,
    )

    execution_plan = create_execution_plan(
        dauphin_pipeline.get_dagster_pipeline(),
        execution_params.environment_dict,
        run_config=RunConfig(mode=execution_params.mode),
    )

    _check_start_pipeline_execution_errors(graphene_info, execution_params,
                                           execution_plan, reexecution_config)

    run = instance.create_run(
        PipelineRun(
            pipeline_name=dauphin_pipeline.get_dagster_pipeline().name,
            run_id=execution_params.execution_metadata.run_id if
            execution_params.execution_metadata.run_id else make_new_run_id(),
            selector=execution_params.selector,
            environment_dict=execution_params.environment_dict,
            mode=execution_params.mode,
            reexecution_config=reexecution_config,
            step_keys_to_execute=execution_params.step_keys,
            tags=execution_params.execution_metadata.tags,
            status=PipelineRunStatus.NOT_STARTED,
        ))

    graphene_info.context.execution_manager.execute_pipeline(
        graphene_info.context.get_handle(),
        dauphin_pipeline.get_dagster_pipeline(),
        run,
        raise_on_error=graphene_info.context.raise_on_error,
        instance=instance,
    )

    return graphene_info.schema.type_named('StartPipelineExecutionSuccess')(
        run=graphene_info.schema.type_named('PipelineRun')(run))
Exemple #7
0
    def get_execution_data(self, context):
        check.inst_param(context, "context", ScheduleExecutionContext)
        result = list(ensure_gen(self._execution_fn(context)))

        if not result:
            return []

        if len(result) == 1:
            check.is_list(result, of_type=(RunRequest, SkipReason))
            data = result[0]

            if isinstance(data, SkipReason):
                return result
            check.inst(data, RunRequest)
            return [
                RunRequest(
                    run_key=data.run_key,
                    run_config=data.run_config,
                    tags=merge_dicts(data.tags,
                                     PipelineRun.tags_for_schedule(self)),
                )
            ]

        check.is_list(result, of_type=RunRequest)

        check.invariant(
            not any(not data.run_key for data in result),
            "Schedules that return multiple RunRequests must specify a run_key in each RunRequest",
        )

        # clone all the run requests with the required schedule tags
        return [
            RunRequest(
                run_key=data.run_key,
                run_config=data.run_config,
                tags=merge_dicts(data.tags,
                                 PipelineRun.tags_for_schedule(self)),
            ) for data in result
        ]
Exemple #8
0
def _fetch_existing_runs(instance, external_sensor, run_requests):
    run_keys = [
        run_request.run_key for run_request in run_requests
        if run_request.run_key
    ]

    if not run_keys:
        return {}

    existing_runs = {}

    if instance.supports_bucket_queries:
        runs = instance.get_runs(
            filters=RunsFilter(
                tags=PipelineRun.tags_for_sensor(external_sensor), ),
            bucket_by=TagBucket(
                tag_key=RUN_KEY_TAG,
                bucket_limit=1,
                tag_values=run_keys,
            ),
        )
        for run in runs:
            tags = run.tags or {}
            run_key = tags.get(RUN_KEY_TAG)
            existing_runs[run_key] = run
        return existing_runs

    else:
        for run_key in run_keys:
            runs = instance.get_runs(
                filters=RunsFilter(tags=merge_dicts(
                    PipelineRun.tags_for_sensor(external_sensor),
                    {RUN_KEY_TAG: run_key},
                )),
                limit=1,
            )
            if runs:
                existing_runs[run_key] = runs[0]
    return existing_runs
Exemple #9
0
    def test_single_write_with_missing_snapshot(self, storage):

        run_with_snapshot_id = "lkasjdflkjasdf"
        pipeline_def = PipelineDefinition(name="some_pipeline", solid_defs=[])

        run_with_missing_snapshot = PipelineRun(
            run_id=run_with_snapshot_id,
            pipeline_name=pipeline_def.name,
            pipeline_snapshot_id="nope",
        )

        with pytest.raises(DagsterSnapshotDoesNotExist):
            storage.add_run(run_with_missing_snapshot)
Exemple #10
0
def _create_run(instance, pipeline_def, run_config, environment_dict):
    return instance.create_run(
        PipelineRun(
            pipeline_name=pipeline_def.name,
            run_id=run_config.run_id,
            environment_dict=environment_dict,
            mode=run_config.mode,
            selector=pipeline_def.selector,
            step_keys_to_execute=run_config.step_keys_to_execute,
            tags=run_config.tags,
            status=PipelineRunStatus.NOT_STARTED,
            previous_run_id=run_config.previous_run_id,
        ))
Exemple #11
0
def yield_empty_pipeline_context(run_id=None, instance=None):
    pipeline = PipelineDefinition([])
    with scoped_pipeline_context(
            pipeline,
        {},
            PipelineRun.create_empty_run(
                'empty',
                run_id=run_id if run_id is not None else 'TESTING',
            ),
            instance or DagsterInstance.ephemeral(),
            create_execution_plan(pipeline),
    ) as context:
        yield context
Exemple #12
0
def execute_partition_set(partition_set, partition_filter, instance=None):
    '''Programatically perform a backfill over a partition set

    Arguments:
        partition_set (PartitionSet): The base partition set to run the backfill over
        partition_filter (Callable[[List[Partition]]], List[Partition]): A function that takes
            a list of partitions and returns a filtered list of partitions to run the backfill
            over.
        instance (DagsterInstance): The instance to use to perform the backfill
    '''
    check.inst_param(partition_set, 'partition_set', PartitionSetDefinition)
    check.callable_param(partition_filter, 'partition_filter')
    check.inst_param(instance, 'instance', DagsterInstance)

    candidate_partitions = partition_set.get_partitions()
    partitions = partition_filter(candidate_partitions)

    instance = instance or DagsterInstance.ephemeral()

    for partition in partitions:
        run = PipelineRun(
            pipeline_name=partition_set.pipeline_name,
            run_id=make_new_run_id(),
            selector=ExecutionSelector(partition_set.pipeline_name),
            environment_dict=partition_set.environment_dict_for_partition(
                partition),
            mode='default',
            tags=merge_dicts(
                PipelineRun.tags_for_backfill_id(make_new_backfill_id()),
                partition_set.tags_for_partition(partition),
            ),
            status=PipelineRunStatus.NOT_STARTED,
        )

        # Remove once we can handle synchronous execution... currently limited by sqlite
        time.sleep(0.1)

        instance.launch_run(run)
Exemple #13
0
def test_k8s_run_launcher(dagster_instance):  # pylint: disable=redefined-outer-name
    run_id = uuid.uuid4().hex
    environment_dict = load_yaml_from_path(os.path.join(environments_path(), 'env.yaml'))
    pipeline_name = 'demo_pipeline'
    run = PipelineRun.create_empty_run(pipeline_name, run_id, environment_dict)

    dagster_instance.launch_run(run)
    success, raw_logs = wait_for_job_success('dagster-job-%s' % run_id)
    result = parse_raw_res(raw_logs.split('\n'))

    assert success
    assert not result.get('errors')
    assert result['data']
    assert result['data']['startPipelineExecution']['__typename'] == 'StartPipelineExecutionSuccess'
Exemple #14
0
 def _has_run(self, run_id):
     global MOCK_HAS_RUN_CALLED  # pylint: disable=global-statement
     # pylint: disable=protected-access
     if not self._run_storage.has_run(
             run_id) and not MOCK_HAS_RUN_CALLED:
         self._run_storage.add_run(
             PipelineRun.create_empty_run('foo_pipeline', run_id))
         MOCK_HAS_RUN_CALLED = True
         return False
     elif self._run_storage.has_run(run_id) and MOCK_HAS_RUN_CALLED:
         MOCK_HAS_RUN_CALLED = False
         return True
     else:
         return False
Exemple #15
0
    def test_wipe_tags(self, storage):
        run_id = 'some_run_id'
        run = PipelineRun(run_id=run_id,
                          pipeline_name='a_pipeline',
                          tags={'foo': 'bar'})

        storage.add_run(run)

        assert storage.get_run_by_id(run_id) == run
        assert dict(storage.get_run_tags()) == {'foo': {'bar'}}

        storage.wipe()
        assert list(storage.get_runs()) == []
        assert dict(storage.get_run_tags()) == {}
Exemple #16
0
def test_execution_plan_reexecution_with_in_memory():
    pipeline_def = define_addy_pipeline()
    instance = DagsterInstance.ephemeral()
    old_run_id = make_new_run_id()
    environment_dict = {
        'solids': {
            'add_one': {
                'inputs': {
                    'num': {
                        'value': 3
                    }
                }
            }
        }
    }
    result = execute_pipeline(
        pipeline_def,
        environment_dict=environment_dict,
        run_config=RunConfig(run_id=old_run_id),
        instance=instance,
    )

    assert result.success

    ## re-execute add_two

    new_run_id = make_new_run_id()

    pipeline_run = PipelineRun(
        pipeline_name=pipeline_def.name,
        run_id=new_run_id,
        environment_dict=environment_dict,
        mode='default',
        parent_run_id=result.run_id,
        root_run_id=result.run_id,
    )

    execution_plan = create_execution_plan(
        pipeline_def,
        environment_dict=environment_dict,
        run_config=RunConfig(run_id=new_run_id, previous_run_id=result.run_id),
    )

    with pytest.raises(DagsterInvariantViolationError):
        execute_plan(
            execution_plan.build_subset_plan(['add_two.compute']),
            environment_dict=environment_dict,
            pipeline_run=pipeline_run,
            instance=instance,
        )
Exemple #17
0
def _get_existing_run_for_request(instance, external_schedule, schedule_time, run_request):
    tags = merge_dicts(
        PipelineRun.tags_for_schedule(external_schedule),
        {
            SCHEDULED_EXECUTION_TIME_TAG: schedule_time.in_tz("UTC").isoformat(),
        },
    )
    if run_request.run_key:
        tags[RUN_KEY_TAG] = run_request.run_key
    runs_filter = PipelineRunsFilter(tags=tags)
    existing_runs = instance.get_runs(runs_filter)
    if not len(existing_runs):
        return None
    return existing_runs[0]
Exemple #18
0
def build_run(
    run_id, pipeline_name, mode='default', tags=None, status=PipelineRunStatus.NOT_STARTED
):
    return PipelineRun(
        pipeline_name=pipeline_name,
        run_id=run_id,
        environment_dict=None,
        mode=mode,
        selector=ExecutionSelector(pipeline_name),
        reexecution_config=None,
        step_keys_to_execute=None,
        tags=tags,
        status=status,
    )
def build_run(run_id,
              pipeline_name,
              mode="default",
              tags=None,
              status=PipelineRunStatus.NOT_STARTED):
    return PipelineRun(
        pipeline_name=pipeline_name,
        run_id=run_id,
        run_config=None,
        mode=mode,
        step_keys_to_execute=None,
        tags=tags,
        status=status,
    )
def test_using_file_system_for_subplan_missing_input():
    pipeline = define_inty_pipeline()
    environment_dict = {'storage': {'filesystem': {}}}

    execution_plan = create_execution_plan(pipeline, environment_dict=environment_dict)
    pipeline_run = PipelineRun.create_empty_run(pipeline.name, make_new_run_id())

    with pytest.raises(DagsterStepOutputNotFoundError):
        execute_plan(
            execution_plan.build_subset_plan(['add_one.compute']),
            DagsterInstance.ephemeral(),
            environment_dict=environment_dict,
            pipeline_run=pipeline_run,
        )
Exemple #21
0
    def test_wipe_tags(self, storage):
        run_id = "some_run_id"
        run = PipelineRun(run_id=run_id,
                          pipeline_name="a_pipeline",
                          tags={"foo": "bar"})

        storage.add_run(run)

        assert storage.get_run_by_id(run_id) == run
        assert dict(storage.get_run_tags()) == {"foo": {"bar"}}

        storage.wipe()
        assert list(storage.get_runs()) == []
        assert dict(storage.get_run_tags()) == {}
def test_running():
    run_id = make_new_run_id()
    handle = ExecutionTargetHandle.for_pipeline_python_file(
        __file__, 'passing_pipeline')
    environment_dict = {
        'solids': {
            'sum_solid': {
                'inputs': {
                    'num': script_relative_path('data/num.csv')
                }
            }
        }
    }
    selector = ExecutionSelector('csv_hello_world')

    instance = DagsterInstance.local_temp()
    pipeline_run = instance.create_run(
        PipelineRun(
            pipeline_name=passing_pipeline.name,
            run_id=run_id,
            selector=selector,
            environment_dict=environment_dict,
            mode='default',
            reexecution_config=None,
            step_keys_to_execute=None,
            tags=None,
            status=PipelineRunStatus.NOT_STARTED,
        ))
    execution_manager = SubprocessExecutionManager(instance)
    execution_manager.execute_pipeline(handle,
                                       passing_pipeline,
                                       pipeline_run,
                                       instance,
                                       raise_on_error=False)
    execution_manager.join()
    assert instance.get_run(run_id).status == PipelineRunStatus.SUCCESS
    events = instance.all_logs(run_id)
    assert events

    process_start_events = get_events_of_type(
        events, DagsterEventType.PIPELINE_PROCESS_START)
    assert len(process_start_events) == 1

    process_started_events = get_events_of_type(
        events, DagsterEventType.PIPELINE_PROCESS_STARTED)
    assert len(process_started_events) == 1

    process_exited_events = get_events_of_type(
        events, DagsterEventType.PIPELINE_PROCESS_EXITED)
    assert len(process_exited_events) == 1
def test_multiple_runs_for_successful_runs():
    def get_should_launch_run():
        return PipelineRun(
            run_id=str(uuid.uuid4()),
            status=PipelineRunStatus.SUCCESS,
            mode="prod",
            pipeline_name="download_pipeline",
            run_config={"resources": DEFAULT_PARTITION_RESOURCE_CONFIG},
        )

    with tempfile.TemporaryDirectory() as temp_dir:
        instance = DagsterInstance.local_temp(temp_dir)
        for run in [
                get_should_launch_run(),
                PipelineRun(status=PipelineRunStatus.FAILURE,
                            mode="prod",
                            pipeline_name="download_pipeline"),
                PipelineRun(status=PipelineRunStatus.SUCCESS,
                            mode="dev",
                            pipeline_name="weird_pipeline"),
                PipelineRun(status=PipelineRunStatus.SUCCESS,
                            mode="test",
                            pipeline_name="download_pipeline"),
                PipelineRun(status=PipelineRunStatus.SUCCESS,
                            mode="prod",
                            pipeline_name="other"),
                get_should_launch_run(),
                get_should_launch_run(),
                get_should_launch_run(),
        ]:
            instance.add_run(run)
        run_requests = list(
            dbt_on_hn_download_finished(
                build_sensor_context(instance=instance)))
        assert len(run_requests) == 4
        for run_request in run_requests:
            assert validate_run_config(dbt_pipeline, run_request.run_config)
Exemple #24
0
def test_valid_job_format_with_backcompat_resources(run_launcher):
    docker_image = test_project_docker_image()

    run_config = load_yaml_from_path(
        os.path.join(test_project_environments_path(), "env.yaml"))
    pipeline_name = "demo_pipeline"
    run = PipelineRun(pipeline_name=pipeline_name, run_config=run_config)

    tags = validate_tags({
        K8S_RESOURCE_REQUIREMENTS_KEY: ({
            "requests": {
                "cpu": "250m",
                "memory": "64Mi"
            },
            "limits": {
                "cpu": "500m",
                "memory": "2560Mi"
            },
        })
    })
    user_defined_k8s_config = get_user_defined_k8s_config(tags)
    job_name = "dagster-run-%s" % run.run_id
    pod_name = "dagster-run-%s" % run.run_id
    job = construct_dagster_k8s_job(
        job_config=run_launcher.job_config,
        command=["dagster"],
        args=["api", "execute_run_with_structured_logs"],
        job_name=job_name,
        user_defined_k8s_config=user_defined_k8s_config,
        pod_name=pod_name,
        component="run_coordinator",
    )

    assert (yaml.dump(
        remove_none_recursively(job.to_dict()),
        default_flow_style=False).strip() == EXPECTED_JOB_SPEC.format(
            run_id=run.run_id,
            job_image=docker_image,
            image_pull_policy=image_pull_policy(),
            dagster_version=dagster_version,
            resources="""
        resources:
          limits:
            cpu: 500m
            memory: 2560Mi
          requests:
            cpu: 250m
            memory: 64Mi""",
        ).strip())
Exemple #25
0
def in_pipeline_manager(
    pipeline_name='hello_world_pipeline',
    solid_handle=SolidHandle('hello_world', 'hello_world', None),
    handle_kwargs=None,
    mode=None,
    **kwargs
):
    manager = Manager()

    run_id = str(uuid.uuid4())
    instance = DagsterInstance.local_temp()
    marshal_dir = tempfile.mkdtemp()

    if not handle_kwargs:
        handle_kwargs = {
            'pipeline_name': pipeline_name,
            'module_name': 'dagstermill.examples.repository',
            'fn_name': 'define_hello_world_pipeline',
        }

    pipeline_run_dict = pack_value(
        PipelineRun(
            pipeline_name=pipeline_name,
            run_id=run_id,
            mode=mode or 'default',
            environment_dict=None,
            selector=None,
            reexecution_config=None,
            step_keys_to_execute=None,
            status=PipelineRunStatus.NOT_STARTED,
        )
    )

    try:
        with safe_tempfile_path() as output_log_file_path:
            context_dict = {
                'pipeline_run_dict': pipeline_run_dict,
                'solid_handle_kwargs': solid_handle._asdict(),
                'handle_kwargs': handle_kwargs,
                'marshal_dir': marshal_dir,
                'environment_dict': {},
                'output_log_path': output_log_file_path,
                'instance_ref_dict': pack_value(instance.get_ref()),
            }

            manager.reconstitute_pipeline_context(**dict(context_dict, **kwargs))
            yield manager
    finally:
        shutil.rmtree(marshal_dir)
Exemple #26
0
def test_using_gcs_for_subplan(gcs_bucket):
    pipeline_def = define_inty_pipeline()

    environment_dict = {'storage': {'gcs': {'config': {'gcs_bucket': gcs_bucket}}}}

    run_id = str(uuid.uuid4())

    execution_plan = create_execution_plan(
        pipeline_def, environment_dict=environment_dict, run_config=RunConfig(run_id=run_id)
    )

    assert execution_plan.get_step_by_key('return_one.compute')

    step_keys = ['return_one.compute']
    instance = DagsterInstance.ephemeral()
    pipeline_run = PipelineRun.create_empty_run(
        pipeline_def.name, run_id=run_id, environment_dict=environment_dict
    )

    return_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(step_keys),
            environment_dict=environment_dict,
            pipeline_run=pipeline_run,
            instance=instance,
        )
    )

    assert get_step_output(return_one_step_events, 'return_one.compute')
    with scoped_pipeline_context(pipeline_def, environment_dict, pipeline_run, instance) as context:
        store = GCSIntermediateStore(
            gcs_bucket, run_id, client=context.scoped_resources_builder.build().gcs.client
        )
        assert store.has_intermediate(context, 'return_one.compute')
        assert store.get_intermediate(context, 'return_one.compute', Int).obj == 1

    add_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(['add_one.compute']),
            environment_dict=environment_dict,
            pipeline_run=pipeline_run,
            instance=instance,
        )
    )

    assert get_step_output(add_one_step_events, 'add_one.compute')
    with scoped_pipeline_context(pipeline_def, environment_dict, pipeline_run, instance) as context:
        assert store.has_intermediate(context, 'add_one.compute')
        assert store.get_intermediate(context, 'add_one.compute', Int).obj == 2
Exemple #27
0
def test_fan_out_should_skip_step():
    @solid(output_defs=[
        OutputDefinition(Int, "out_1", is_required=False),
        OutputDefinition(Int, "out_2", is_required=False),
        OutputDefinition(Int, "out_3", is_required=False),
    ])
    def foo(_):
        yield Output(1, "out_1")

    @solid
    def bar(_, input_arg):
        return input_arg

    @pipeline
    def optional_outputs():
        foo_res = foo()
        # pylint: disable=no-member
        bar.alias("bar_1")(input_arg=foo_res.out_1)
        bar.alias("bar_2")(input_arg=foo_res.out_2)
        bar.alias("bar_3")(input_arg=foo_res.out_3)

    instance = DagsterInstance.ephemeral()
    pipeline_run = PipelineRun(pipeline_name="optional_outputs",
                               run_id=make_new_run_id())
    execute_plan(
        create_execution_plan(optional_outputs, step_keys_to_execute=["foo"]),
        InMemoryPipeline(optional_outputs),
        instance,
        pipeline_run,
    )

    assert not should_skip_step(
        create_execution_plan(optional_outputs, step_keys_to_execute=["bar_1"
                                                                      ]),
        instance,
        pipeline_run.run_id,
    )
    assert should_skip_step(
        create_execution_plan(optional_outputs,
                              step_keys_to_execute=["bar_2"]),
        instance,
        pipeline_run.run_id,
    )
    assert should_skip_step(
        create_execution_plan(optional_outputs,
                              step_keys_to_execute=["bar_3"]),
        instance,
        pipeline_run.run_id,
    )
Exemple #28
0
def pipeline_run_from_execution_params(execution_params):
    check.inst_param(execution_params, 'execution_params', ExecutionParams)

    return PipelineRun(
        pipeline_name=execution_params.selector.name,
        run_id=execution_params.execution_metadata.run_id
        if execution_params.execution_metadata.run_id else make_new_run_id(),
        selector=execution_params.selector,
        environment_dict=execution_params.environment_dict,
        mode=execution_params.mode,
        step_keys_to_execute=execution_params.step_keys,
        tags=execution_params.execution_metadata.tags,
        status=PipelineRunStatus.NOT_STARTED,
        previous_run_id=execution_params.previous_run_id,
    )
def test_no_runs_for_failed_run():
    with tempfile.TemporaryDirectory() as temp_dir:

        instance = DagsterInstance.local_temp(temp_dir)
        instance.add_run(
            PipelineRun(
                status=PipelineRunStatus.FAILURE,
                mode="prod",
                pipeline_name="download_pipeline",
                run_config={"resources": DEFAULT_PARTITION_RESOURCE_CONFIG},
            ))
        run_requests = list(
            dbt_on_hn_download_finished(
                build_sensor_context(instance=instance)))
        assert len(run_requests) == 0
def test_nuke():
    storage = SqliteRunStorage.mem()

    assert storage
    run_id = str(uuid.uuid4())

    storage.add_run(
        PipelineRun.create_empty_run(run_id=run_id,
                                     pipeline_name='some_pipeline'))

    assert len(storage.all_runs) == 1

    storage.wipe()

    assert list(storage.all_runs) == []