Ejemplo n.º 1
0
def test_failing():
    run_id = make_new_run_id()
    handle = ExecutionTargetHandle.for_pipeline_python_file(__file__, 'failing_pipeline')
    environment_dict = {
        'solids': {'sum_solid': {'inputs': {'num': file_relative_path(__file__, 'data/num.csv')}}}
    }
    selector = ExecutionSelector('csv_hello_world')

    instance = DagsterInstance.local_temp()
    pipeline_run = instance.create_run(
        PipelineRun(
            pipeline_name=failing_pipeline.name,
            run_id=run_id,
            selector=selector,
            environment_dict=environment_dict,
            mode='default',
            reexecution_config=None,
            step_keys_to_execute=None,
            tags=None,
            status=PipelineRunStatus.NOT_STARTED,
        )
    )
    execution_manager = SubprocessExecutionManager(instance)
    execution_manager.execute_pipeline(handle, failing_pipeline, pipeline_run, instance)
    execution_manager.join()
    assert instance.get_run_by_id(run_id).status == PipelineRunStatus.FAILURE
    assert instance.all_logs(run_id)
Ejemplo n.º 2
0
def test_roundtrip_run():
    run = PipelineRun(
        pipeline_name='pipey_mcpipeface',
        run_id='8675309',
        environment_dict={'good': True},
        mode='default',
        selector=ExecutionSelector('pipey_mcpipeface'),
        step_keys_to_execute=['step_1', 'step_2', 'step_3'],
        tags={'tag_it': 'bag_it'},
        status=PipelineRunStatus.NOT_STARTED,
        previous_run_id='previousID',
    )
    for field in run:
        # ensure we have a test value to round trip for each field
        assert field

    exec_params = execution_params_from_pipeline_run(run)
    assert run == pipeline_run_from_execution_params(exec_params)

    exec_params_gql = execution_params_from_graphql(
        exec_params.to_graphql_input())
    assert exec_params_gql == exec_params
    assert run == pipeline_run_from_execution_params(exec_params_gql)

    empty_run = PipelineRun.create_empty_run('foo', 'bar')
    exec_params = execution_params_from_pipeline_run(empty_run)
    assert empty_run == pipeline_run_from_execution_params(exec_params)

    exec_params_gql = execution_params_from_graphql(
        exec_params.to_graphql_input())
    assert exec_params_gql == exec_params
    assert empty_run == pipeline_run_from_execution_params(exec_params_gql)
Ejemplo n.º 3
0
def initialize_step_context(scratch_dir):
    pipeline_run = PipelineRun(
        pipeline_name='foo_pipeline',
        run_id=str(uuid.uuid4()),
        environment_dict=make_environment_dict(scratch_dir, 'external'),
        mode='external',
    )

    plan = create_execution_plan(reconstructable(define_basic_pipeline),
                                 pipeline_run.environment_dict,
                                 mode='external')

    initialization_manager = pipeline_initialization_manager(
        plan,
        pipeline_run.environment_dict,
        pipeline_run,
        DagsterInstance.ephemeral(),
    )
    for _ in initialization_manager.generate_setup_events():
        pass
    pipeline_context = initialization_manager.get_object()

    active_execution = plan.start(retries=Retries(RetryMode.DISABLED))
    step = active_execution.get_next_step()
    step_context = pipeline_context.for_step(step)
    return step_context
Ejemplo n.º 4
0
def test_valid_job_format(run_launcher):
    docker_image = test_project_docker_image()

    run_config = load_yaml_from_path(
        os.path.join(test_project_environments_path(), "env.yaml"))
    pipeline_name = "demo_pipeline"
    run = PipelineRun(pipeline_name=pipeline_name, run_config=run_config)

    job_name = "dagster-run-%s" % run.run_id
    pod_name = "dagster-run-%s" % run.run_id
    job = construct_dagster_k8s_job(
        job_config=run_launcher.job_config,
        command=["dagster-graphql"],
        args=[
            "-p",
            "executeRunInProcess",
            "-v",
            seven.json.dumps({"runId": run.run_id}),
        ],
        job_name=job_name,
        pod_name=pod_name,
        component="run_coordinator",
    )

    assert (yaml.dump(
        remove_none_recursively(job.to_dict()),
        default_flow_style=False).strip() == EXPECTED_JOB_SPEC.format(
            run_id=run.run_id,
            job_image=docker_image,
            image_pull_policy=image_pull_policy(),
            dagster_version=dagster_version,
            resources="",
        ).strip())
Ejemplo n.º 5
0
def create_test_pipeline_execution_context(logger_defs=None):
    loggers = check.opt_dict_param(logger_defs,
                                   "logger_defs",
                                   key_type=str,
                                   value_type=LoggerDefinition)
    mode_def = ModeDefinition(logger_defs=loggers)
    pipeline_def = PipelineDefinition(name="test_legacy_context",
                                      solid_defs=[],
                                      mode_defs=[mode_def])
    run_config = {"loggers": {key: {} for key in loggers}}
    pipeline_run = PipelineRun(pipeline_name="test_legacy_context",
                               run_config=run_config)
    instance = DagsterInstance.ephemeral()
    execution_plan = create_execution_plan(pipeline=pipeline_def,
                                           run_config=run_config)
    creation_data = create_context_creation_data(
        InMemoryPipeline(pipeline_def), execution_plan, run_config,
        pipeline_run, instance)
    log_manager = create_log_manager(creation_data)
    scoped_resources_builder = ScopedResourcesBuilder()
    executor = create_executor(creation_data)

    return PlanExecutionContext(
        plan_data=create_plan_data(creation_data, True, executor.retries),
        execution_data=create_execution_data(
            context_creation_data=creation_data,
            scoped_resources_builder=scoped_resources_builder,
        ),
        log_manager=log_manager,
        output_capture=None,
    )
Ejemplo n.º 6
0
def test_valid_job_format(run_launcher):
    docker_image = test_project_docker_image()

    run_config = load_yaml_from_path(
        os.path.join(test_project_environments_path(), 'env.yaml'))
    pipeline_name = 'demo_pipeline'
    run = PipelineRun(pipeline_name=pipeline_name, run_config=run_config)

    job_name = 'dagster-run-%s' % run.run_id
    pod_name = 'dagster-run-%s' % run.run_id
    job = construct_dagster_graphql_k8s_job(
        run_launcher.job_config,
        args=[
            '-p',
            'executeRunInProcess',
            '-v',
            seven.json.dumps({'runId': run.run_id}),
        ],
        job_name=job_name,
        pod_name=pod_name,
        component='runmaster',
    )

    assert (yaml.dump(
        remove_none_recursively(job.to_dict()),
        default_flow_style=False).strip() == EXPECTED_JOB_SPEC.format(
            run_id=run.run_id,
            job_image=docker_image,
            image_pull_policy=image_pull_policy(),
            dagster_version=dagster_version,
            resources='',
        ).strip())
Ejemplo n.º 7
0
def test_valid_job_format(run_launcher):
    docker_image = get_test_project_docker_image()

    run_config = load_yaml_from_path(
        os.path.join(get_test_project_environments_path(), "env.yaml"))
    pipeline_name = "demo_pipeline"
    run = PipelineRun(pipeline_name=pipeline_name, run_config=run_config)

    job_name = "dagster-run-%s" % run.run_id
    pod_name = "dagster-run-%s" % run.run_id
    job = construct_dagster_k8s_job(
        job_config=run_launcher.get_static_job_config(),
        args=["dagster", "api", "execute_run"],
        job_name=job_name,
        pod_name=pod_name,
        component="run_coordinator",
    )

    assert (yaml.dump(
        remove_none_recursively(job.to_dict()),
        default_flow_style=False).strip() == EXPECTED_JOB_SPEC.format(
            run_id=run.run_id,
            job_image=docker_image,
            image_pull_policy=image_pull_policy(),
            dagster_version=dagster_version,
            resources="",
            env_from=ENV_FROM,
        ).strip())
Ejemplo n.º 8
0
def create_test_pipeline_execution_context(logger_defs=None):
    from dagster.core.storage.intermediate_storage import build_in_mem_intermediates_storage

    loggers = check.opt_dict_param(
        logger_defs, "logger_defs", key_type=str, value_type=LoggerDefinition
    )
    mode_def = ModeDefinition(logger_defs=loggers)
    pipeline_def = PipelineDefinition(
        name="test_legacy_context", solid_defs=[], mode_defs=[mode_def]
    )
    run_config = {"loggers": {key: {} for key in loggers}}
    pipeline_run = PipelineRun(pipeline_name="test_legacy_context", run_config=run_config)
    instance = DagsterInstance.ephemeral()
    execution_plan = create_execution_plan(pipeline=pipeline_def, run_config=run_config)
    creation_data = create_context_creation_data(execution_plan, run_config, pipeline_run, instance)
    log_manager = create_log_manager(creation_data)
    scoped_resources_builder = ScopedResourcesBuilder()
    executor = create_executor(creation_data)

    return SystemPipelineExecutionContext(
        construct_execution_context_data(
            context_creation_data=creation_data,
            scoped_resources_builder=scoped_resources_builder,
            intermediate_storage=build_in_mem_intermediates_storage(pipeline_run.run_id),
            log_manager=log_manager,
            retries=executor.retries,
            raise_on_error=True,
        ),
        executor=executor,
        log_manager=log_manager,
    )
Ejemplo n.º 9
0
def create_test_pipeline_execution_context(logger_defs=None):
    loggers = check.opt_dict_param(
        logger_defs, 'logger_defs', key_type=str, value_type=LoggerDefinition
    )
    mode_def = ModeDefinition(logger_defs=loggers)
    pipeline_def = PipelineDefinition(
        name='test_legacy_context', solid_defs=[], mode_defs=[mode_def]
    )
    environment_dict = {'loggers': {key: {} for key in loggers}}
    pipeline_run = PipelineRun(
        pipeline_name='test_legacy_context', environment_dict=environment_dict
    )
    instance = DagsterInstance.ephemeral()
    execution_plan = create_execution_plan(pipeline=pipeline_def, environment_dict=environment_dict)
    creation_data = create_context_creation_data(
        execution_plan, environment_dict, pipeline_run, instance
    )
    log_manager = create_log_manager(creation_data)
    scoped_resources_builder = ScopedResourcesBuilder()
    executor_config = create_executor_config(creation_data)
    return construct_pipeline_execution_context(
        context_creation_data=creation_data,
        scoped_resources_builder=scoped_resources_builder,
        system_storage_data=SystemStorageData(
            intermediates_manager=InMemoryIntermediatesManager(),
            file_manager=LocalFileManager.for_instance(instance, pipeline_run.run_id),
        ),
        log_manager=log_manager,
        executor_config=executor_config,
        raise_on_error=True,
    )
Ejemplo n.º 10
0
def initialize_step_context(scratch_dir, instance):
    pipeline_run = PipelineRun(
        pipeline_name="foo_pipeline",
        run_id=str(uuid.uuid4()),
        run_config=make_run_config(scratch_dir, "external"),
        mode="external",
    )

    recon_pipeline = reconstructable(define_basic_pipeline)

    plan = create_execution_plan(recon_pipeline,
                                 pipeline_run.run_config,
                                 mode="external")

    initialization_manager = PipelineExecutionContextManager(
        recon_pipeline,
        plan,
        pipeline_run.run_config,
        pipeline_run,
        instance,
    )
    for _ in initialization_manager.prepare_context():
        pass
    pipeline_context = initialization_manager.get_context()

    step_context = pipeline_context.for_step(
        plan.get_step_by_key("return_two"))
    return step_context
Ejemplo n.º 11
0
        def python_callable(ts, dag_run, **kwargs):  # pylint: disable=unused-argument
            run_id = dag_run.run_id

            # TODO: https://github.com/dagster-io/dagster/issues/1342
            redacted = construct_variables(mode, 'REDACTED', pipeline_name,
                                           run_id, ts, step_keys)
            logging.info('Executing GraphQL query: {query}\n'.format(
                query=EXECUTE_PLAN_MUTATION) + 'with variables:\n' +
                         seven.json.dumps(redacted, indent=2))
            instance = DagsterInstance.from_ref(
                instance_ref) if instance_ref else None
            if instance:
                instance.get_or_create_run(
                    PipelineRun(
                        pipeline_name=pipeline_name,
                        run_id=run_id,
                        environment_dict=environment_dict,
                        mode=mode,
                        selector=ExecutionSelector(pipeline_name),
                        reexecution_config=None,
                        step_keys_to_execute=None,
                        tags=None,
                        status=PipelineRunStatus.MANAGED,
                    ))

            events = execute_execute_plan_mutation(
                handle,
                construct_variables(mode, environment_dict, pipeline_name,
                                    run_id, ts, step_keys),
                instance_ref=instance_ref,
            )

            check_events_for_skips(events)

            return events
Ejemplo n.º 12
0
    def create_run_with_snapshot(self, create_run_args):
        check.inst_param(create_run_args, 'create_run_args',
                         InstanceCreateRunArgs)

        from dagster.core.snap.pipeline_snapshot import create_pipeline_snapshot_id

        snapshot_id = create_pipeline_snapshot_id(
            create_run_args.pipeline_snapshot)

        if not self._run_storage.has_pipeline_snapshot(snapshot_id):
            returned_snapshot_id = self._run_storage.add_pipeline_snapshot(
                create_run_args.pipeline_snapshot)

            check.invariant(snapshot_id == returned_snapshot_id)

        return self.create_run(
            PipelineRun(
                pipeline_name=create_run_args.pipeline_snapshot.name,
                pipeline_snapshot_id=snapshot_id,
                run_id=create_run_args.run_id,
                environment_dict=create_run_args.environment_dict,
                mode=create_run_args.mode,
                selector=create_run_args.selector,
                step_keys_to_execute=create_run_args.step_keys_to_execute,
                status=create_run_args.status,
                tags=create_run_args.tags,
                parent_run_id=create_run_args.parent_run_id,
                root_run_id=create_run_args.root_run_id,
            ))
Ejemplo n.º 13
0
def execute_partition_set(partition_set, partition_filter, instance=None):
    check.inst_param(partition_set, 'partition_set', PartitionSetDefinition)
    check.callable_param(partition_filter, 'partition_filter')
    check.inst_param(instance, 'instance', DagsterInstance)

    candidate_partitions = partition_set.get_partitions()
    partitions = partition_filter(candidate_partitions)

    instance = instance or DagsterInstance.ephemeral()

    for partition in partitions:
        run = PipelineRun(
            pipeline_name=partition_set.pipeline_name,
            run_id=make_new_run_id(),
            selector=ExecutionSelector(partition_set.pipeline_name),
            environment_dict=partition_set.environment_dict_for_partition(
                partition),
            mode='default',
            tags=merge_dicts({'dagster/backfill': 'custom'},
                             partition_set.tags_for_partition(partition)),
            status=PipelineRunStatus.NOT_STARTED,
        )

        # Remove once we can handle synchronous execution... currently limited by sqlite
        time.sleep(0.1)

        instance.run_launcher.launch_run(run)
Ejemplo n.º 14
0
    def test_single_write_read_with_snapshot(self, storage):
        run_with_snapshot_id = "lkasjdflkjasdf"
        pipeline_def = PipelineDefinition(name="some_pipeline", solid_defs=[])

        pipeline_snapshot = pipeline_def.get_pipeline_snapshot()

        pipeline_snapshot_id = create_pipeline_snapshot_id(pipeline_snapshot)

        run_with_snapshot = PipelineRun(
            run_id=run_with_snapshot_id,
            pipeline_name=pipeline_def.name,
            pipeline_snapshot_id=pipeline_snapshot_id,
        )

        assert not storage.has_pipeline_snapshot(pipeline_snapshot_id)

        assert storage.add_pipeline_snapshot(pipeline_snapshot) == pipeline_snapshot_id

        assert serialize_pp(storage.get_pipeline_snapshot(pipeline_snapshot_id)) == serialize_pp(
            pipeline_snapshot
        )

        storage.add_run(run_with_snapshot)

        assert storage.get_run_by_id(run_with_snapshot_id) == run_with_snapshot

        storage.wipe()

        assert not storage.has_pipeline_snapshot(pipeline_snapshot_id)
        assert not storage.has_run(run_with_snapshot_id)
Ejemplo n.º 15
0
def test_execution_plan_wrong_run_id():
    pipeline_def = define_addy_pipeline()

    unrun_id = make_new_run_id()
    new_run_id = make_new_run_id()
    environment_dict = env_with_fs({'solids': {'add_one': {'inputs': {'num': {'value': 3}}}}})

    pipeline_run = PipelineRun(
        pipeline_name=pipeline_def.name,
        run_id=new_run_id,
        environment_dict=environment_dict,
        mode='default',
        previous_run_id=unrun_id,
    )

    execution_plan = create_execution_plan(
        pipeline_def, environment_dict=environment_dict, run_config=pipeline_run
    )

    with pytest.raises(DagsterRunNotFoundError) as exc_info:
        execute_plan(
            execution_plan,
            environment_dict=environment_dict,
            pipeline_run=pipeline_run,
            instance=DagsterInstance.ephemeral(),
        )

    assert str(
        exc_info.value
    ) == 'Run id {} set as previous run id was not found in instance'.format(unrun_id)

    assert exc_info.value.invalid_run_id == unrun_id
Ejemplo n.º 16
0
def initialize_step_context(scratch_dir):
    pipeline_run = PipelineRun(
        pipeline_name='foo_pipeline',
        run_id=str(uuid.uuid4()),
        run_config=make_run_config(scratch_dir, 'external'),
        mode='external',
    )

    plan = create_execution_plan(reconstructable(define_basic_pipeline),
                                 pipeline_run.run_config,
                                 mode='external')

    initialization_manager = PipelineExecutionContextManager(
        plan,
        pipeline_run.run_config,
        pipeline_run,
        DagsterInstance.ephemeral(),
    )
    for _ in initialization_manager.prepare_context():
        pass
    pipeline_context = initialization_manager.get_context()

    active_execution = plan.start(retries=Retries(RetryMode.DISABLED))
    step = active_execution.get_next_step()
    step_context = pipeline_context.for_step(step)
    return step_context
Ejemplo n.º 17
0
def pipeline_launch_command(env, preset_name, mode, **kwargs):
    env = list(check.opt_tuple_param(env, 'env', default=(), of_type=str))

    pipeline = create_pipeline_from_cli_args(kwargs)

    instance = DagsterInstance.get()

    if preset_name:
        if env:
            raise click.UsageError('Can not use --preset with --env.')

        if mode:
            raise click.UsageError('Can not use --preset with --mode.')

        preset = pipeline.get_preset(preset_name)
    else:
        preset = None

    run_tags = get_tags_from_args(kwargs)

    run = PipelineRun(
        pipeline_name=pipeline.name,
        run_id=make_new_run_id(),
        selector=ExecutionSelector(pipeline.name,
                                   preset.solid_subset if preset else None),
        environment_dict=preset.environment_dict
        if preset else load_yaml_from_glob_list(env),
        mode=(preset.mode if preset else mode) or 'default',
        status=PipelineRunStatus.NOT_STARTED,
        tags=run_tags,
    )

    return instance.launch_run(run)
Ejemplo n.º 18
0
def test_execution_crash():
    run_id = make_new_run_id()
    handle = ExecutionTargetHandle.for_pipeline_python_file(__file__, 'crashy_pipeline')
    environment_dict = {
        'solids': {'sum_solid': {'inputs': {'num': file_relative_path(__file__, 'data/num.csv')}}}
    }
    selector = ExecutionSelector('csv_hello_world')

    instance = DagsterInstance.local_temp()
    pipeline_run = instance.create_run(
        PipelineRun(
            pipeline_name=crashy_pipeline.name,
            run_id=run_id,
            selector=selector,
            environment_dict=environment_dict,
            mode='default',
            reexecution_config=None,
            step_keys_to_execute=None,
            tags=None,
            status=PipelineRunStatus.NOT_STARTED,
        )
    )
    execution_manager = SubprocessExecutionManager(instance)
    execution_manager.execute_pipeline(handle, crashy_pipeline, pipeline_run, instance)
    execution_manager.join()
    assert instance.get_run_by_id(run_id).status == PipelineRunStatus.FAILURE
    last_log = instance.all_logs(run_id)[-1]

    assert last_log.message.startswith(
        'Exception: Pipeline execution process for {run_id} unexpectedly exited\n'.format(
            run_id=run_id
        )
    )
Ejemplo n.º 19
0
    def _construct_run_with_snapshots(
        self,
        pipeline_name,
        run_id,
        run_config,
        mode,
        solids_to_execute,
        step_keys_to_execute,
        status,
        tags,
        root_run_id,
        parent_run_id,
        pipeline_snapshot,
        execution_plan_snapshot,
        parent_pipeline_snapshot,
        solid_selection=None,
    ):

        # https://github.com/dagster-io/dagster/issues/2403
        if tags and IS_AIRFLOW_INGEST_PIPELINE_STR in tags:
            if AIRFLOW_EXECUTION_DATE_STR not in tags:
                tags[AIRFLOW_EXECUTION_DATE_STR] = get_current_datetime_in_utc().isoformat()

        check.invariant(
            not (not pipeline_snapshot and execution_plan_snapshot),
            "It is illegal to have an execution plan snapshot and not have a pipeline snapshot. "
            "It is possible to have no execution plan snapshot since we persist runs "
            "that do not successfully compile execution plans in the scheduled case.",
        )

        pipeline_snapshot_id = (
            self._ensure_persisted_pipeline_snapshot(pipeline_snapshot, parent_pipeline_snapshot)
            if pipeline_snapshot
            else None
        )

        execution_plan_snapshot_id = (
            self._ensure_persisted_execution_plan_snapshot(
                execution_plan_snapshot, pipeline_snapshot_id, step_keys_to_execute
            )
            if execution_plan_snapshot and pipeline_snapshot_id
            else None
        )

        return PipelineRun(
            pipeline_name=pipeline_name,
            run_id=run_id,
            run_config=run_config,
            mode=mode,
            solid_selection=solid_selection,
            solids_to_execute=solids_to_execute,
            step_keys_to_execute=step_keys_to_execute,
            status=status,
            tags=tags,
            root_run_id=root_run_id,
            parent_run_id=parent_run_id,
            pipeline_snapshot_id=pipeline_snapshot_id,
            execution_plan_snapshot_id=execution_plan_snapshot_id,
        )
 def get_should_launch_run():
     return PipelineRun(
         run_id=str(uuid.uuid4()),
         status=PipelineRunStatus.SUCCESS,
         mode="prod",
         pipeline_name="download_pipeline",
         run_config={"resources": DEFAULT_PARTITION_RESOURCE_CONFIG},
     )
Ejemplo n.º 21
0
def test_roundtrip_run():
    run_with_snapshot = PipelineRun(
        pipeline_name='pipey_mcpipeface',
        run_id='8675309',
        environment_dict={'good': True},
        mode='default',
        selector=ExecutionSelector('pipey_mcpipeface'),
        step_keys_to_execute=['step_1', 'step_2', 'step_3'],
        tags={'tag_it': 'bag_it'},
        status=PipelineRunStatus.NOT_STARTED,
        root_run_id='previousID',
        parent_run_id='previousID',
        pipeline_snapshot_id='pipey_mcpipeface_snapshot_id',
        execution_plan_snapshot_id='mcexecutionplanface_snapshot_id',
    )
    for field in run_with_snapshot:
        # ensure we have a test value to round trip for each field
        assert field

    # The invariant that all the execution parameter structs
    # pipeline run can be constructed from each other is no longer
    # true. Clients of the GraphQL API cannot know the value of the
    # pipeline_snapshot_id prior to execution, because it is
    # constructed on the server. Hence these roundtrip tests
    # do not include snapshot_id

    run = run_with_snapshot._replace(pipeline_snapshot_id=None, execution_plan_snapshot_id=None)

    exec_params = execution_params_from_pipeline_run(run)
    for key, value in pipeline_run_args_from_execution_params(exec_params).items():
        assert getattr(run, key) == value

    exec_params_gql = execution_params_from_graphql(exec_params.to_graphql_input())
    assert exec_params_gql == exec_params
    for key, value in pipeline_run_args_from_execution_params(exec_params_gql).items():
        assert getattr(run, key) == value

    empty_run = PipelineRun(pipeline_name='foo', run_id='bar', mode='default')
    exec_params = execution_params_from_pipeline_run(empty_run)
    for key, value in pipeline_run_args_from_execution_params(exec_params).items():
        assert getattr(empty_run, key) == value

    exec_params_gql = execution_params_from_graphql(exec_params.to_graphql_input())
    assert exec_params_gql == exec_params
    for key, value in pipeline_run_args_from_execution_params(exec_params_gql).items():
        assert getattr(empty_run, key) == value
def test_execution_plan_reexecution():
    pipeline_def = define_addy_pipeline()
    instance = DagsterInstance.ephemeral()
    old_run_id = str(uuid.uuid4())
    environment_dict = env_with_fs(
        {'solids': {
            'add_one': {
                'inputs': {
                    'num': {
                        'value': 3
                    }
                }
            }
        }})
    result = execute_pipeline(
        pipeline_def,
        environment_dict=environment_dict,
        run_config=RunConfig(run_id=old_run_id),
        instance=instance,
    )

    assert result.success

    store = build_fs_intermediate_store(instance.intermediates_directory,
                                        result.run_id)
    assert store.get_intermediate(None, 'add_one.compute', Int).obj == 4
    assert store.get_intermediate(None, 'add_two.compute', Int).obj == 6

    ## re-execute add_two

    new_run_id = str(uuid.uuid4())

    pipeline_run = PipelineRun(
        pipeline_name=pipeline_def.name,
        run_id=new_run_id,
        environment_dict=environment_dict,
        mode='default',
        previous_run_id=result.run_id,
    )

    execution_plan = create_execution_plan(pipeline_def,
                                           environment_dict=environment_dict,
                                           run_config=pipeline_run)

    step_events = execute_plan(
        execution_plan.build_subset_plan(['add_two.compute']),
        environment_dict=environment_dict,
        pipeline_run=pipeline_run,
        instance=instance,
    )

    store = build_fs_intermediate_store(instance.intermediates_directory,
                                        new_run_id)
    assert store.get_intermediate(None, 'add_one.compute', Int).obj == 4
    assert store.get_intermediate(None, 'add_two.compute', Int).obj == 6

    assert not get_step_output_event(step_events, 'add_one.compute')
    assert get_step_output_event(step_events, 'add_two.compute')
Ejemplo n.º 23
0
    def test_write_conflicting_run_id(self, storage):
        double_run_id = "double_run_id"
        pipeline_def = PipelineDefinition(name="some_pipeline", solid_defs=[])

        run = PipelineRun(run_id=double_run_id, pipeline_name=pipeline_def.name)

        assert storage.add_run(run)
        with pytest.raises(DagsterRunAlreadyExists):
            storage.add_run(run)
Ejemplo n.º 24
0
def test_queued_pipeline_origin_check():
    fake_pipeline_origin = ExternalPipelineOrigin(
        ExternalRepositoryOrigin(
            InProcessRepositoryLocationOrigin(
                ReconstructableRepository(ModuleCodePointer("fake", "fake"))),
            "foo_repo",
        ),
        "foo",
    )

    PipelineRun(status=PipelineRunStatus.QUEUED,
                external_pipeline_origin=fake_pipeline_origin)

    with pytest.raises(check.CheckError):
        PipelineRun(status=PipelineRunStatus.QUEUED)

    with pytest.raises(check.CheckError):
        PipelineRun().with_status(PipelineRunStatus.QUEUED)
Ejemplo n.º 25
0
def test_roundtrip_run():
    run_with_snapshot = PipelineRun(
        pipeline_name='pipey_mcpipeface',
        run_id='8675309',
        environment_dict={'good': True},
        mode='default',
        solid_subset=['solid_1'],
        step_keys_to_execute=['step_1', 'step_2', 'step_3'],
        tags={'tag_it': 'bag_it'},
        status=PipelineRunStatus.NOT_STARTED,
        root_run_id='previousID',
        parent_run_id='previousID',
        pipeline_snapshot_id='pipey_mcpipeface_snapshot_id',
        execution_plan_snapshot_id='mcexecutionplanface_snapshot_id',
    )
    for field in run_with_snapshot:
        # ensure we have a test value to round trip for each field
        assert field

    # The invariant that all the execution parameter structs
    # pipeline run can be constructed from each other is no longer
    # true. Clients of the GraphQL API cannot know the value of the
    # pipeline_snapshot_id prior to execution, because it is
    # constructed on the server. Hence these roundtrip tests
    # do not include snapshot_id

    run = run_with_snapshot._replace(pipeline_snapshot_id=None,
                                     execution_plan_snapshot_id=None)

    context = define_context_for_file(__file__, 'pipey_mcpipeface',
                                      DagsterInstance.ephemeral())

    exec_params = execution_params_from_pipeline_run(context, run)

    exec_params_gql = execution_params_from_graphql(
        context, exec_params.to_graphql_input())
    assert exec_params_gql == exec_params

    empty_run = PipelineRun(pipeline_name='foo', run_id='bar', mode='default')
    exec_params = execution_params_from_pipeline_run(context, empty_run)

    exec_params_gql = execution_params_from_graphql(
        context, exec_params.to_graphql_input())
    assert exec_params_gql == exec_params
Ejemplo n.º 26
0
def test_execute_plan_iterator():
    records = []

    def event_callback(record):
        assert isinstance(record, EventRecord)
        records.append(record)

    instance = DagsterInstance.local_temp()

    pipeline = PipelineDefinition(
        name='basic_resource_pipeline',
        solid_defs=[resource_solid],
        mode_defs=[
            ModeDefinition(
                resource_defs={
                    'a': resource_a,
                    'b': resource_b
                },
                logger_defs={
                    'callback': construct_event_logger(event_callback)
                },
            )
        ],
    )
    environment_dict = {'loggers': {'callback': {}}}
    pipeline_run = instance.create_run(
        PipelineRun(
            pipeline_name=pipeline.name,
            run_id=make_new_run_id(),
            environment_dict={'loggers': {
                'callback': {}
            }},
            mode='default',
            status=PipelineRunStatus.NOT_STARTED,
        ))

    execution_plan = create_execution_plan(pipeline, environment_dict)
    iterator = execute_plan_iterator(execution_plan,
                                     pipeline_run,
                                     instance,
                                     environment_dict=environment_dict)

    event_type = None
    while event_type != 'STEP_START':
        event = next(iterator)
        event_type = event.event_type_value

    iterator.close()
    messages = [
        record.user_message for record in records
        if not record.is_dagster_event
    ]
    assert len([message
                for message in messages if message == 'CLEANING A']) > 0
    assert len([message
                for message in messages if message == 'CLEANING B']) > 0
Ejemplo n.º 27
0
def start_pipeline_execution(graphene_info, execution_params, reexecution_config):
    check.inst_param(graphene_info, 'graphene_info', ResolveInfo)
    check.inst_param(execution_params, 'execution_params', ExecutionParams)
    check.opt_inst_param(reexecution_config, 'reexecution_config', ReexecutionConfig)

    instance = graphene_info.context.instance

    dauphin_pipeline = get_dauphin_pipeline_from_selector_or_raise(
        graphene_info, execution_params.selector
    )

    get_validated_config(
        graphene_info,
        dauphin_pipeline,
        environment_dict=execution_params.environment_dict,
        mode=execution_params.mode,
    )

    execution_plan = create_execution_plan(
        dauphin_pipeline.get_dagster_pipeline(),
        execution_params.environment_dict,
        run_config=RunConfig(mode=execution_params.mode),
    )

    _check_start_pipeline_execution_errors(
        graphene_info, execution_params, execution_plan, reexecution_config
    )

    run = instance.create_run(
        PipelineRun(
            pipeline_name=dauphin_pipeline.get_dagster_pipeline().name,
            run_id=execution_params.execution_metadata.run_id
            if execution_params.execution_metadata.run_id
            else make_new_run_id(),
            selector=execution_params.selector,
            environment_dict=execution_params.environment_dict,
            mode=execution_params.mode,
            reexecution_config=reexecution_config,
            step_keys_to_execute=execution_params.step_keys,
            tags=execution_params.execution_metadata.tags,
            status=PipelineRunStatus.NOT_STARTED,
        )
    )

    graphene_info.context.execution_manager.execute_pipeline(
        graphene_info.context.get_handle(),
        dauphin_pipeline.get_dagster_pipeline(),
        run,
        instance=instance,
    )

    return graphene_info.schema.type_named('StartPipelineExecutionSuccess')(
        run=graphene_info.schema.type_named('PipelineRun')(run)
    )
def test_no_runs_for_different_mode():
    with tempfile.TemporaryDirectory() as temp_dir:
        instance = DagsterInstance.local_temp(temp_dir)
        instance.add_run(
            PipelineRun(status=PipelineRunStatus.SUCCESS,
                        mode="xyz",
                        pipeline_name="download_pipeline"))
        run_requests = list(
            dbt_on_hn_download_finished(
                build_sensor_context(instance=instance)))
        assert len(run_requests) == 0
Ejemplo n.º 29
0
def test_valid_job_format_with_backcompat_resources(run_launcher):
    docker_image = test_project_docker_image()

    run_config = load_yaml_from_path(
        os.path.join(test_project_environments_path(), "env.yaml"))
    pipeline_name = "demo_pipeline"
    run = PipelineRun(pipeline_name=pipeline_name, run_config=run_config)

    tags = validate_tags({
        K8S_RESOURCE_REQUIREMENTS_KEY: ({
            "requests": {
                "cpu": "250m",
                "memory": "64Mi"
            },
            "limits": {
                "cpu": "500m",
                "memory": "2560Mi"
            },
        })
    })
    user_defined_k8s_config = get_user_defined_k8s_config(tags)
    job_name = "dagster-run-%s" % run.run_id
    pod_name = "dagster-run-%s" % run.run_id
    job = construct_dagster_k8s_job(
        job_config=run_launcher.job_config,
        command=["dagster-graphql"],
        args=[
            "-p",
            "executeRunInProcess",
            "-v",
            seven.json.dumps({"runId": run.run_id}),
        ],
        job_name=job_name,
        user_defined_k8s_config=user_defined_k8s_config,
        pod_name=pod_name,
        component="run_coordinator",
    )

    assert (yaml.dump(
        remove_none_recursively(job.to_dict()),
        default_flow_style=False).strip() == EXPECTED_JOB_SPEC.format(
            run_id=run.run_id,
            job_image=docker_image,
            image_pull_policy=image_pull_policy(),
            dagster_version=dagster_version,
            resources="""
        resources:
          limits:
            cpu: 500m
            memory: 2560Mi
          requests:
            cpu: 250m
            memory: 64Mi""",
        ).strip())
Ejemplo n.º 30
0
def test_valid_job_format_with_resources(run_launcher):
    docker_image = test_project_docker_image()

    run_config = load_yaml_from_path(
        os.path.join(test_project_environments_path(), 'env.yaml'))
    pipeline_name = 'demo_pipeline'
    run = PipelineRun(pipeline_name=pipeline_name, run_config=run_config)

    tags = validate_tags({
        K8S_RESOURCE_REQUIREMENTS_KEY: ({
            'requests': {
                'cpu': '250m',
                'memory': '64Mi'
            },
            'limits': {
                'cpu': '500m',
                'memory': '2560Mi'
            },
        })
    })
    resources = get_k8s_resource_requirements(tags)
    job_name = 'dagster-run-%s' % run.run_id
    pod_name = 'dagster-run-%s' % run.run_id
    job = construct_dagster_k8s_job(
        job_config=run_launcher.job_config,
        command=['dagster-graphql'],
        args=[
            '-p',
            'executeRunInProcess',
            '-v',
            seven.json.dumps({'runId': run.run_id}),
        ],
        job_name=job_name,
        resources=resources,
        pod_name=pod_name,
        component='runmaster',
    )

    assert (yaml.dump(
        remove_none_recursively(job.to_dict()),
        default_flow_style=False).strip() == EXPECTED_JOB_SPEC.format(
            run_id=run.run_id,
            job_image=docker_image,
            image_pull_policy=image_pull_policy(),
            dagster_version=dagster_version,
            resources='''
        resources:
          limits:
            cpu: 500m
            memory: 2560Mi
          requests:
            cpu: 250m
            memory: 64Mi''',
        ).strip())