예제 #1
0
def test_runs_over_time():
    with seven.TemporaryDirectory() as temp_dir:
        instance = DagsterInstance.local_temp(temp_dir)

        repo_1 = get_repo_at_time_1()

        full_evolve_run_id = execute_pipeline(
            repo_1.get_pipeline("evolving_pipeline"), instance=instance).run_id
        foo_run_id = execute_pipeline(repo_1.get_pipeline("foo_pipeline"),
                                      instance=instance).run_id
        evolve_a_run_id = execute_pipeline(
            repo_1.get_pipeline("evolving_pipeline").get_pipeline_subset_def(
                {"solid_A"}),
            instance=instance,
        ).run_id
        evolve_b_run_id = execute_pipeline(
            repo_1.get_pipeline("evolving_pipeline").get_pipeline_subset_def(
                {"solid_B"}),
            instance=instance,
        ).run_id

        context_at_time_1 = define_context_for_file(__file__,
                                                    "get_repo_at_time_1",
                                                    instance)

        result = execute_dagster_graphql(context_at_time_1, ALL_RUNS_QUERY)
        assert result.data

        t1_runs = {
            run["runId"]: run
            for run in result.data["pipelineRunsOrError"]["results"]
        }

        assert t1_runs[full_evolve_run_id]["pipeline"] == {
            "__typename": "PipelineSnapshot",
            "name": "evolving_pipeline",
            "solidSelection": None,
        }

        assert t1_runs[foo_run_id]["pipeline"] == {
            "__typename": "PipelineSnapshot",
            "name": "foo_pipeline",
            "solidSelection": None,
        }

        assert t1_runs[evolve_a_run_id]["pipeline"] == {
            "__typename": "PipelineSnapshot",
            "name": "evolving_pipeline",
            "solidSelection": ["solid_A"],
        }

        assert t1_runs[evolve_b_run_id]["pipeline"] == {
            "__typename": "PipelineSnapshot",
            "name": "evolving_pipeline",
            "solidSelection": ["solid_B"],
        }

        context_at_time_2 = define_context_for_file(__file__,
                                                    "get_repo_at_time_2",
                                                    instance)

        result = execute_dagster_graphql(context_at_time_2, ALL_RUNS_QUERY)
        assert result.data

        t2_runs = {
            run["runId"]: run
            for run in result.data["pipelineRunsOrError"]["results"]
        }

        assert t2_runs[full_evolve_run_id]["pipeline"] == {
            "__typename": "PipelineSnapshot",
            "name": "evolving_pipeline",
            "solidSelection": None,
        }

        assert t2_runs[evolve_a_run_id]["pipeline"] == {
            "__typename": "PipelineSnapshot",
            "name": "evolving_pipeline",
            "solidSelection": ["solid_A"],
        }
        # pipeline name changed
        assert t2_runs[foo_run_id]["pipeline"] == {
            "__typename": "PipelineSnapshot",
            "name": "foo_pipeline",
            "solidSelection": None,
        }
        # subset no longer valid - b renamed
        assert t2_runs[evolve_b_run_id]["pipeline"] == {
            "__typename": "PipelineSnapshot",
            "name": "evolving_pipeline",
            "solidSelection": ["solid_B"],
        }
예제 #2
0
def test_multiprocessing_execution_for_composite_solid_with_config_mapping():
    environment_dict = {
        'solids': {
            'composite_with_nested_config_solid_and_config_mapping': {
                'config': {
                    'foo': 'baz',
                    'bar': 3
                }
            }
        }
    }

    run_id = make_new_run_id()
    handle = ExecutionTargetHandle.for_pipeline_python_file(
        __file__, 'composite_pipeline_with_config_mapping')

    instance = DagsterInstance.local_temp()
    pipeline_run = instance.create_run(
        PipelineRun(
            pipeline_name=composite_pipeline_with_config_mapping.name,
            run_id=run_id,
            selector=ExecutionSelector('nonce'),
            environment_dict=environment_dict,
            mode='default',
            reexecution_config=None,
            step_keys_to_execute=None,
            tags=None,
            status=PipelineRunStatus.NOT_STARTED,
        ))
    execution_manager = SubprocessExecutionManager(instance)
    execution_manager.execute_pipeline(handle,
                                       composite_pipeline_with_config_mapping,
                                       pipeline_run,
                                       instance,
                                       raise_on_error=False)
    execution_manager.join()
    assert instance.get_run(run_id).status == PipelineRunStatus.SUCCESS

    environment_dict = {
        'solids': {
            'composite_with_nested_config_solid_and_config_mapping': {
                'config': {
                    'foo': 'baz',
                    'bar': 3
                }
            }
        },
        'execution': {
            'multiprocess': {}
        },
        'storage': {
            'filesystem': {}
        },
    }

    run_id = make_new_run_id()

    pipeline_run = instance.create_run(
        PipelineRun(
            pipeline_name=composite_pipeline.name,
            run_id=run_id,
            selector=ExecutionSelector('nonce'),
            environment_dict=environment_dict,
            mode='default',
            reexecution_config=None,
            step_keys_to_execute=None,
            tags=None,
            status=PipelineRunStatus.NOT_STARTED,
        ))
    execution_manager = SubprocessExecutionManager(instance)
    execution_manager.execute_pipeline(handle,
                                       composite_pipeline,
                                       pipeline_run,
                                       instance,
                                       raise_on_error=False)

    execution_manager.join()
    assert instance.get_run(run_id).status == PipelineRunStatus.SUCCESS
예제 #3
0
def test_execute_hammer_through_dagit():
    recon_repo = ReconstructableRepository.for_file(
        file_relative_path(
            __file__, '../../../../examples/dagster_examples/toys/hammer.py'),
        'hammer_pipeline',
    )
    instance = DagsterInstance.local_temp()

    context = DagsterGraphQLContext(
        locations=[InProcessRepositoryLocation(recon_repo)],
        instance=instance,
    )

    selector = get_legacy_pipeline_selector(context, 'hammer_pipeline')

    executor = SyncExecutor()

    variables = {
        'executionParams': {
            'runConfigData': {
                'storage': {
                    'filesystem': {}
                },
                'execution': {
                    'dask': {
                        'config': {
                            'cluster': {
                                'local': {}
                            }
                        }
                    }
                },
            },
            'selector': selector,
            'mode': 'default',
        }
    }

    start_pipeline_result = graphql(
        request_string=LAUNCH_PIPELINE_EXECUTION_MUTATION,
        schema=create_schema(),
        context=context,
        variables=variables,
        executor=executor,
    )

    if start_pipeline_result.errors:
        raise Exception('{}'.format(start_pipeline_result.errors))

    run_id = start_pipeline_result.data['launchPipelineExecution']['run'][
        'runId']

    context.drain_outstanding_executions()

    subscription = execute_dagster_graphql(context,
                                           SUBSCRIPTION_QUERY,
                                           variables={'runId': run_id})

    subscribe_results = []
    subscription.subscribe(subscribe_results.append)

    messages = [
        x['__typename']
        for x in subscribe_results[0].data['pipelineRunLogs']['messages']
    ]

    assert 'PipelineStartEvent' in messages
    assert 'PipelineSuccessEvent' in messages
예제 #4
0
def test_successful_two_part_execute_plan(snapshot):
    run_id = make_new_run_id()
    instance = DagsterInstance.local_temp()
    instance.create_empty_run(run_id, 'csv_hello_world')
    result_one = execute_dagster_graphql(
        define_test_context(instance=instance),
        EXECUTE_PLAN_QUERY,
        variables={
            'executionParams': {
                'selector': {
                    'name': 'csv_hello_world'
                },
                'environmentConfigData':
                csv_hello_world_solids_config_fs_storage(),
                'stepKeys': ['sum_solid.compute'],
                'executionMetadata': {
                    'runId': run_id
                },
                'mode': 'default',
            }
        },
    )

    assert result_one.data['executePlan']['__typename'] == 'ExecutePlanSuccess'

    snapshot.assert_match(clean_log_messages(result_one.data))

    result_two = execute_dagster_graphql(
        define_test_context(instance=instance),
        EXECUTE_PLAN_QUERY,
        variables={
            'executionParams': {
                'selector': {
                    'name': 'csv_hello_world'
                },
                'environmentConfigData':
                csv_hello_world_solids_config_fs_storage(),
                'stepKeys': ['sum_sq_solid.compute'],
                'executionMetadata': {
                    'runId': run_id
                },
                'mode': 'default',
            }
        },
    )

    query_result = result_two.data['executePlan']
    assert query_result['__typename'] == 'ExecutePlanSuccess'
    assert query_result['pipeline']['name'] == 'csv_hello_world'
    assert query_result['hasFailures'] is False
    step_events = query_result['stepEvents']
    assert [se['__typename'] for se in step_events] == [
        'EngineEvent',
        'ExecutionStepStartEvent',
        'ObjectStoreOperationEvent',
        'ExecutionStepInputEvent',
        'ExecutionStepOutputEvent',
        'ObjectStoreOperationEvent',
        'ExecutionStepSuccessEvent',
        'EngineEvent',
    ]
    assert step_events[1]['step']['key'] == 'sum_sq_solid.compute'
    assert step_events[2]['step']['key'] == 'sum_sq_solid.compute'
    assert step_events[3]['step']['key'] == 'sum_sq_solid.compute'
    assert step_events[4]['outputName'] == 'result'
    assert step_events[5]['step']['key'] == 'sum_sq_solid.compute'

    snapshot.assert_match(clean_log_messages(result_two.data))

    expected_value_repr = (
        '''[OrderedDict([('num1', '1'), ('num2', '2'), ('sum', 3), '''
        '''('sum_sq', 9)]), OrderedDict([('num1', '3'), ('num2', '4'), ('sum', 7), '''
        '''('sum_sq', 49)])]''')

    store = build_fs_intermediate_store(instance.intermediates_directory,
                                        run_id)
    assert store.has_intermediate(None, 'sum_sq_solid.compute')
    assert (str(
        store.get_intermediate(None, 'sum_sq_solid.compute',
                               PoorMansDataFrame).obj) == expected_value_repr)
예제 #5
0
def test_filesystem_persist_one_run():
    do_test_single_write_read(DagsterInstance.local_temp())
예제 #6
0
def test_execute_hammer_through_dagit():
    handle = ExecutionTargetHandle.for_pipeline_python_file(
        script_relative_path(
            '../../../examples/dagster_examples/toys/hammer.py'),
        'hammer_pipeline')
    instance = DagsterInstance.local_temp()

    execution_manager = SubprocessExecutionManager(instance)

    context = DagsterGraphQLContext(handle=handle,
                                    execution_manager=execution_manager,
                                    instance=instance)

    executor = SyncExecutor()

    variables = {
        'executionParams': {
            'environmentConfigData': {
                'storage': {
                    'filesystem': {}
                },
                'execution': {
                    'dask': {}
                }
            },
            'selector': {
                'name': handle.build_pipeline_definition().name
            },
            'mode': 'default',
        }
    }

    start_pipeline_result = graphql(
        request_string=START_PIPELINE_EXECUTION_MUTATION,
        schema=create_schema(),
        context=context,
        variables=variables,
        executor=executor,
    )

    run_id = start_pipeline_result.data['startPipelineExecution']['run'][
        'runId']

    context.execution_manager.join()

    subscription = execute_dagster_graphql(context,
                                           SUBSCRIPTION_QUERY,
                                           variables={'runId': run_id})

    subscribe_results = []
    subscription.subscribe(subscribe_results.append)

    messages = [
        x['__typename']
        for x in subscribe_results[0].data['pipelineRunLogs']['messages']
    ]

    assert 'PipelineProcessStartEvent' in messages
    assert 'PipelineProcessStartedEvent' in messages
    assert 'PipelineStartEvent' in messages
    assert 'PipelineSuccessEvent' in messages
    assert 'PipelineProcessExitedEvent' in messages
예제 #7
0
def test_interrupt_multiproc():
    with seven.TemporaryDirectory() as tempdir:
        file_1 = os.path.join(tempdir, "file_1")
        file_2 = os.path.join(tempdir, "file_2")
        file_3 = os.path.join(tempdir, "file_3")
        file_4 = os.path.join(tempdir, "file_4")

        # launch a thread that waits until the file is written to launch an interrupt
        Thread(target=_send_kbd_int, args=([file_1, file_2, file_3,
                                            file_4], )).start()

        results = []
        try:
            # launch a pipeline that writes a file and loops infinitely
            # next time the launched thread wakes up it will send a keyboard
            # interrupt
            for result in execute_pipeline_iterator(
                    reconstructable(write_files_pipeline),
                    run_config={
                        "solids": {
                            "write_1": {
                                "config": {
                                    "tempfile": file_1
                                }
                            },
                            "write_2": {
                                "config": {
                                    "tempfile": file_2
                                }
                            },
                            "write_3": {
                                "config": {
                                    "tempfile": file_3
                                }
                            },
                            "write_4": {
                                "config": {
                                    "tempfile": file_4
                                }
                            },
                        },
                        "execution": {
                            "multiprocess": {
                                "config": {
                                    "max_concurrent": 4
                                }
                            }
                        },
                        "storage": {
                            "filesystem": {}
                        },
                    },
                    instance=DagsterInstance.local_temp(tempdir=tempdir),
            ):
                results.append(result)
            assert False  # should never reach
        except (DagsterSubprocessError, KeyboardInterrupt):
            pass

        assert [result.event_type for result in results
                ].count(DagsterEventType.STEP_FAILURE) == 4
        assert DagsterEventType.PIPELINE_FAILURE in [
            result.event_type for result in results
        ]
예제 #8
0
def test_retry_early_terminate():
    with seven.TemporaryDirectory() as temp_dir:
        instance = DagsterInstance.local_temp(temp_dir)
        context = define_test_subprocess_context(instance=instance)
        run_id = make_new_run_id()
        execute_dagster_graphql(
            context,
            START_PIPELINE_EXECUTION_QUERY,
            variables={
                'executionParams': {
                    'mode': 'default',
                    'selector': {
                        'name': 'retry_multi_input_early_terminate_pipeline'
                    },
                    'environmentConfigData': {
                        'solids': {
                            'get_input_one': {
                                'config': {
                                    'wait_to_terminate': True
                                }
                            },
                            'get_input_two': {
                                'config': {
                                    'wait_to_terminate': True
                                }
                            },
                        },
                        'storage': {
                            'filesystem': {}
                        },
                    },
                    'executionMetadata': {
                        'runId': run_id
                    },
                }
            },
        )
        # Wait until the first step succeeded
        while instance.get_run_stats(run_id).steps_succeeded < 1:
            sleep(0.1)
        # Terminate the current pipeline run at the second step
        context.execution_manager.terminate(run_id)

        records = instance.all_logs(run_id)

        # The first step should succeed, the second should fail or not start,
        # and the following steps should not appear in records
        assert step_did_succeed_in_records(records, 'return_one.compute')
        assert any([
            step_did_fail_in_records(records, 'get_input_one.compute'),
            step_did_not_run_in_records(records, 'get_input_one.compute'),
        ])
        assert step_did_not_run_in_records(records, 'get_input_two.compute')
        assert step_did_not_run_in_records(records, 'sum_inputs.compute')

        # Start retry
        new_run_id = make_new_run_id()

        execute_dagster_graphql(
            context,
            START_PIPELINE_REEXECUTION_QUERY,
            variables={
                'executionParams': {
                    'mode': 'default',
                    'selector': {
                        'name': 'retry_multi_input_early_terminate_pipeline'
                    },
                    'environmentConfigData': {
                        'solids': {
                            'get_input_one': {
                                'config': {
                                    'wait_to_terminate': False
                                }
                            },
                            'get_input_two': {
                                'config': {
                                    'wait_to_terminate': False
                                }
                            },
                        },
                        'storage': {
                            'filesystem': {}
                        },
                    },
                    'executionMetadata': {
                        'runId': new_run_id,
                        'rootRunId': run_id,
                        'parentRunId': run_id,
                        'tags': [{
                            'key': RESUME_RETRY_TAG,
                            'value': 'true'
                        }],
                    },
                }
            },
        )
        # Wait until the run is finished
        while context.execution_manager.is_process_running(new_run_id):
            pass

        retry_records = instance.all_logs(new_run_id)
        # The first step should not run and the other three steps should succeed in retry
        assert step_did_not_run_in_records(retry_records, 'return_one.compute')
        assert step_did_succeed_in_records(retry_records,
                                           'get_input_one.compute')
        assert step_did_succeed_in_records(retry_records,
                                           'get_input_two.compute')
        assert step_did_succeed_in_records(retry_records, 'sum_inputs.compute')
예제 #9
0
def create_local_temp_instance():
    yield DagsterInstance.local_temp()
예제 #10
0
def test_run_groups_over_time():
    with seven.TemporaryDirectory() as tempdir:
        instance = DagsterInstance.local_temp(tempdir=tempdir)

        repo_1 = get_repo_at_time_1()

        full_evolve_run_id = execute_pipeline(
            repo_1.get_pipeline("evolving_pipeline"), instance=instance).run_id
        foo_run_id = execute_pipeline(repo_1.get_pipeline("foo_pipeline"),
                                      instance=instance).run_id
        evolve_a_run_id = execute_pipeline(
            repo_1.get_pipeline("evolving_pipeline").get_pipeline_subset_def(
                {"solid_A"}),
            instance=instance,
        ).run_id
        evolve_b_run_id = execute_pipeline(
            repo_1.get_pipeline("evolving_pipeline").get_pipeline_subset_def(
                {"solid_B"}),
            instance=instance,
        ).run_id

        context_at_time_1 = define_out_of_process_context(
            __file__, "get_repo_at_time_1", instance)

        result = execute_dagster_graphql(context_at_time_1,
                                         ALL_RUN_GROUPS_QUERY)
        assert result.data
        assert "runGroupsOrError" in result.data
        assert "results" in result.data["runGroupsOrError"]
        assert len(result.data["runGroupsOrError"]["results"]) == 4

        t1_runs = {
            run["runId"]: run
            for group in result.data["runGroupsOrError"]["results"]
            for run in group["runs"]
        }

        # test full_evolve_run_id
        assert t1_runs[full_evolve_run_id]["pipeline"] == {
            "__typename": "PipelineSnapshot",
            "name": "evolving_pipeline",
            "solidSelection": None,
        }

        # test foo_run_id
        assert t1_runs[foo_run_id]["pipeline"] == {
            "__typename": "PipelineSnapshot",
            "name": "foo_pipeline",
            "solidSelection": None,
        }

        # test evolve_a_run_id
        assert t1_runs[evolve_a_run_id]["pipeline"] == {
            "__typename": "PipelineSnapshot",
            "name": "evolving_pipeline",
            "solidSelection": ["solid_A"],
        }
        assert t1_runs[evolve_a_run_id]["pipelineSnapshotId"]

        # test evolve_b_run_id
        assert t1_runs[evolve_b_run_id]["pipeline"] == {
            "__typename": "PipelineSnapshot",
            "name": "evolving_pipeline",
            "solidSelection": ["solid_B"],
        }

        context_at_time_2 = define_out_of_process_context(
            __file__, "get_repo_at_time_2", instance)

        result = execute_dagster_graphql(context_at_time_2,
                                         ALL_RUN_GROUPS_QUERY)
        assert "runGroupsOrError" in result.data
        assert "results" in result.data["runGroupsOrError"]
        assert len(result.data["runGroupsOrError"]["results"]) == 4

        t2_runs = {
            run["runId"]: run
            for group in result.data["runGroupsOrError"]["results"]
            for run in group["runs"]
        }

        # test full_evolve_run_id
        assert t2_runs[full_evolve_run_id]["pipeline"] == {
            "__typename": "PipelineSnapshot",
            "name": "evolving_pipeline",
            "solidSelection": None,
        }

        # test evolve_a_run_id
        assert t2_runs[evolve_a_run_id]["pipeline"] == {
            "__typename": "PipelineSnapshot",
            "name": "evolving_pipeline",
            "solidSelection": ["solid_A"],
        }
        assert t2_runs[evolve_a_run_id]["pipelineSnapshotId"]

        # names same
        assert (t1_runs[full_evolve_run_id]["pipeline"]["name"] ==
                t2_runs[evolve_a_run_id]["pipeline"]["name"])

        # snapshots differ
        assert (t1_runs[full_evolve_run_id]["pipelineSnapshotId"] !=
                t2_runs[evolve_a_run_id]["pipelineSnapshotId"])

        # pipeline name changed
        assert t2_runs[foo_run_id]["pipeline"] == {
            "__typename": "PipelineSnapshot",
            "name": "foo_pipeline",
            "solidSelection": None,
        }
        # subset no longer valid - b renamed
        assert t2_runs[evolve_b_run_id]["pipeline"] == {
            "__typename": "PipelineSnapshot",
            "name": "evolving_pipeline",
            "solidSelection": ["solid_B"],
        }
예제 #11
0
def test_retry_pipeline_execution():
    context = define_test_context(instance=DagsterInstance.local_temp())
    result = execute_dagster_graphql(
        context,
        START_PIPELINE_EXECUTION_QUERY,
        variables={
            'executionParams': {
                'mode': 'default',
                'selector': {'name': 'eventually_successful'},
                'environmentConfigData': retry_config(0),
            }
        },
    )

    run_id = result.data['startPipelineExecution']['run']['runId']
    logs = sync_get_all_logs_for_run(context, run_id)['pipelineRunLogs']['messages']
    assert step_did_succeed(logs, 'spawn.compute')
    assert step_did_fail(logs, 'fail.compute')
    assert step_did_skip(logs, 'fail_2.compute')
    assert step_did_skip(logs, 'fail_3.compute')
    assert step_did_skip(logs, 'reset.compute')

    retry_one = execute_dagster_graphql(
        context,
        START_PIPELINE_REEXECUTION_QUERY,
        variables={
            'executionParams': {
                'mode': 'default',
                'selector': {'name': 'eventually_successful'},
                'environmentConfigData': retry_config(1),
                'executionMetadata': {
                    'rootRunId': run_id,
                    'parentRunId': run_id,
                    'tags': [{'key': RESUME_RETRY_TAG, 'value': 'true'}],
                },
            }
        },
    )

    run_id = retry_one.data['startPipelineReexecution']['run']['runId']
    logs = sync_get_all_logs_for_run(context, run_id)['pipelineRunLogs']['messages']
    assert step_did_not_run(logs, 'spawn.compute')
    assert step_did_succeed(logs, 'fail.compute')
    assert step_did_fail(logs, 'fail_2.compute')
    assert step_did_skip(logs, 'fail_3.compute')
    assert step_did_skip(logs, 'reset.compute')

    retry_two = execute_dagster_graphql(
        context,
        START_PIPELINE_REEXECUTION_QUERY,
        variables={
            'executionParams': {
                'mode': 'default',
                'selector': {'name': 'eventually_successful'},
                'environmentConfigData': retry_config(2),
                'executionMetadata': {
                    'rootRunId': run_id,
                    'parentRunId': run_id,
                    'tags': [{'key': RESUME_RETRY_TAG, 'value': 'true'}],
                },
            }
        },
    )

    run_id = retry_two.data['startPipelineReexecution']['run']['runId']
    logs = sync_get_all_logs_for_run(context, run_id)['pipelineRunLogs']['messages']

    assert step_did_not_run(logs, 'spawn.compute')
    assert step_did_not_run(logs, 'fail.compute')
    assert step_did_succeed(logs, 'fail_2.compute')
    assert step_did_fail(logs, 'fail_3.compute')
    assert step_did_skip(logs, 'reset.compute')

    retry_three = execute_dagster_graphql(
        context,
        START_PIPELINE_REEXECUTION_QUERY,
        variables={
            'executionParams': {
                'mode': 'default',
                'selector': {'name': 'eventually_successful'},
                'environmentConfigData': retry_config(3),
                'executionMetadata': {
                    'rootRunId': run_id,
                    'parentRunId': run_id,
                    'tags': [{'key': RESUME_RETRY_TAG, 'value': 'true'}],
                },
            }
        },
    )

    run_id = retry_three.data['startPipelineReexecution']['run']['runId']
    logs = sync_get_all_logs_for_run(context, run_id)['pipelineRunLogs']['messages']

    assert step_did_not_run(logs, 'spawn.compute')
    assert step_did_not_run(logs, 'fail.compute')
    assert step_did_not_run(logs, 'fail_2.compute')
    assert step_did_succeed(logs, 'fail_3.compute')
    assert step_did_succeed(logs, 'reset.compute')
예제 #12
0
def graphql_context():
    with seven.TemporaryDirectory() as temp_dir:
        yield define_test_context(DagsterInstance.local_temp(temp_dir))
예제 #13
0
def test_runs_over_time():
    instance = DagsterInstance.local_temp()

    repo_1 = get_repo_at_time_1()

    full_evolve_run_id = execute_pipeline(
        repo_1.get_pipeline('evolving_pipeline'), instance=instance).run_id
    foo_run_id = execute_pipeline(repo_1.get_pipeline('foo_pipeline'),
                                  instance=instance).run_id
    evolve_a_run_id = execute_pipeline(
        repo_1.get_pipeline('evolving_pipeline').build_sub_pipeline(
            ['solid_A']),
        instance=instance).run_id
    evolve_b_run_id = execute_pipeline(
        repo_1.get_pipeline('evolving_pipeline').build_sub_pipeline(
            ['solid_B']),
        instance=instance).run_id

    context_at_time_1 = define_context_for_file(__file__, 'get_repo_at_time_1',
                                                instance)

    result = execute_dagster_graphql(context_at_time_1, ALL_RUNS_QUERY)
    assert result.data

    t1_runs = {
        run['runId']: run
        for run in result.data['pipelineRunsOrError']['results']
    }

    # test full_evolve_run_id
    assert t1_runs[full_evolve_run_id]['pipeline']['__typename'] == 'Pipeline'
    assert t1_runs[full_evolve_run_id]['executionSelection'] == {
        'name': 'evolving_pipeline',
        'solidSubset': None,
    }

    # test foo_run_id
    assert t1_runs[foo_run_id]['pipeline']['__typename'] == 'Pipeline'
    assert t1_runs[foo_run_id]['executionSelection'] == {
        'name': 'foo_pipeline',
        'solidSubset': None,
    }

    # test evolve_a_run_id
    assert t1_runs[evolve_a_run_id]['pipeline']['__typename'] == 'Pipeline'
    assert t1_runs[evolve_a_run_id]['executionSelection'] == {
        'name': 'evolving_pipeline',
        'solidSubset': ['solid_A'],
    }
    assert t1_runs[evolve_a_run_id]['pipelineSnapshotId']

    # test evolve_b_run_id
    assert t1_runs[evolve_b_run_id]['pipeline']['__typename'] == 'Pipeline'
    assert t1_runs[evolve_b_run_id]['executionSelection'] == {
        'name': 'evolving_pipeline',
        'solidSubset': ['solid_B'],
    }

    context_at_time_2 = define_context_for_file(__file__, 'get_repo_at_time_2',
                                                instance)

    result = execute_dagster_graphql(context_at_time_2, ALL_RUNS_QUERY)
    assert result.data

    t2_runs = {
        run['runId']: run
        for run in result.data['pipelineRunsOrError']['results']
    }

    # test full_evolve_run_id
    assert t2_runs[full_evolve_run_id]['pipeline']['__typename'] == 'Pipeline'
    assert t1_runs[full_evolve_run_id]['executionSelection'] == {
        'name': 'evolving_pipeline',
        'solidSubset': None,
    }

    # test evolve_a_run_id
    assert t2_runs[evolve_a_run_id]['pipeline']['__typename'] == 'Pipeline'
    assert t2_runs[evolve_a_run_id]['executionSelection'] == {
        'name': 'evolving_pipeline',
        'solidSubset': ['solid_A'],
    }
    assert t2_runs[evolve_a_run_id]['pipelineSnapshotId']

    # names same
    assert (t1_runs[full_evolve_run_id]['pipeline']['name'] ==
            t2_runs[evolve_a_run_id]['pipeline']['name'])

    # snapshots differ
    assert (t1_runs[full_evolve_run_id]['pipelineSnapshotId'] !=
            t2_runs[evolve_a_run_id]['pipelineSnapshotId'])

    # pipeline name changed
    assert t2_runs[foo_run_id]['pipeline']['__typename'] == 'UnknownPipeline'
    assert t1_runs[foo_run_id]['executionSelection'] == {
        'name': 'foo_pipeline',
        'solidSubset': None,
    }
    # subset no longer valid - b renamed
    assert t2_runs[evolve_b_run_id]['pipeline'][
        '__typename'] == 'UnknownPipeline'
    assert t2_runs[evolve_b_run_id]['executionSelection'] == {
        'name': 'evolving_pipeline',
        'solidSubset': ['solid_B'],
    }
예제 #14
0
def test_multiprocessing_execution_for_composite_solid():
    environment_dict = {
        'solids': {
            'composite_with_nested_config_solid': {
                'solids': {
                    'node_a': {
                        'config': {
                            'foo': 'baz'
                        }
                    },
                    'node_b': {
                        'config': {
                            'bar': 3
                        }
                    }
                }
            }
        }
    }

    instance = DagsterInstance.local_temp()
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=composite_pipeline,
        environment_dict=environment_dict,
    )
    execution_manager = SubprocessExecutionManager(instance)
    execution_manager.execute_pipeline(reconstructable(get_composite_pipeline),
                                       pipeline_run, instance)
    execution_manager.join()
    assert instance.get_run_by_id(
        pipeline_run.run_id).status == PipelineRunStatus.SUCCESS

    environment_dict = {
        'solids': {
            'composite_with_nested_config_solid': {
                'solids': {
                    'node_a': {
                        'config': {
                            'foo': 'baz'
                        }
                    },
                    'node_b': {
                        'config': {
                            'bar': 3
                        }
                    }
                }
            }
        },
        'execution': {
            'multiprocess': {}
        },
        'storage': {
            'filesystem': {}
        },
    }

    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=composite_pipeline,
        environment_dict=environment_dict,
    )
    execution_manager = SubprocessExecutionManager(instance)
    execution_manager.execute_pipeline(reconstructable(get_composite_pipeline),
                                       pipeline_run, instance)
    execution_manager.join()
def test_max_concurrency_one():
    handle = ExecutionTargetHandle.for_pipeline_python_file(
        __file__, 'infinite_loop_pipeline')

    run_id_one = make_new_run_id()
    run_id_two = make_new_run_id()

    with safe_tempfile_path() as file_one, safe_tempfile_path() as file_two:
        instance = DagsterInstance.local_temp()
        execution_manager = QueueingSubprocessExecutionManager(
            instance, max_concurrent_runs=1)

        run_one = instance.create_run(
            PipelineRun.create_empty_run(
                pipeline_name=infinite_loop_pipeline.name,
                run_id=run_id_one,
                environment_dict={
                    'solids': {
                        'loop': {
                            'config': {
                                'file': file_one
                            }
                        }
                    }
                },
            ))
        run_two = instance.create_run(
            PipelineRun.create_empty_run(
                pipeline_name=infinite_loop_pipeline.name,
                run_id=run_id_two,
                environment_dict={
                    'solids': {
                        'loop': {
                            'config': {
                                'file': file_two
                            }
                        }
                    }
                },
            ))

        execution_manager.execute_pipeline(handle, infinite_loop_pipeline,
                                           run_one, instance)
        execution_manager.execute_pipeline(handle, infinite_loop_pipeline,
                                           run_two, instance)

        while not os.path.exists(file_one):
            execution_manager.check()
            time.sleep(0.1)

        assert execution_manager.is_active(run_id_one)
        assert not execution_manager.is_active(run_id_two)
        assert not os.path.exists(file_two)

        assert execution_manager.terminate(run_id_one)

        while not os.path.exists(file_two):
            execution_manager.check()
            time.sleep(0.1)

        assert not execution_manager.is_active(run_id_one)
        assert execution_manager.is_active(run_id_two)
        assert execution_manager.terminate(run_id_two)
예제 #16
0
def test_get_runs_over_graphql():
    from .utils import (
        define_test_context,
        sync_execute_get_run_log_data,
    )

    payload_one = sync_execute_get_run_log_data({
        'executionParams': {
            'selector': {
                'name': 'multi_mode_with_resources'
            },
            'mode': 'add_mode',
            'environmentConfigData': {
                'resources': {
                    'op': {
                        'config': 2
                    }
                }
            },
        }
    })
    run_id_one = payload_one['run']['runId']

    payload_two = sync_execute_get_run_log_data({
        'executionParams': {
            'selector': {
                'name': 'multi_mode_with_resources'
            },
            'mode': 'add_mode',
            'environmentConfigData': {
                'resources': {
                    'op': {
                        'config': 3
                    }
                }
            },
        }
    })

    run_id_two = payload_two['run']['runId']

    read_context = define_test_context(instance=DagsterInstance.local_temp())

    result = execute_dagster_graphql(
        read_context,
        RUNS_QUERY,
        variables={'name': 'multi_mode_with_resources'})

    # delete the second run
    result = execute_dagster_graphql(read_context,
                                     DELETE_RUN_MUTATION,
                                     variables={'runId': run_id_two})
    assert result.data['deletePipelineRun'][
        '__typename'] == 'DeletePipelineRunSuccess'
    assert result.data['deletePipelineRun']['runId'] == run_id_two

    # query it back out
    result = execute_dagster_graphql(
        read_context,
        RUNS_QUERY,
        variables={'name': 'multi_mode_with_resources'})

    # first is the same
    run_one_data = _get_runs_data(result, run_id_one)
    assert run_one_data

    # second is gone
    run_two_data = _get_runs_data(result, run_id_two)
    assert run_two_data is None

    # try to delete the second run again
    execute_dagster_graphql(read_context,
                            DELETE_RUN_MUTATION,
                            variables={'runId': run_id_two})

    result = execute_dagster_graphql(read_context,
                                     DELETE_RUN_MUTATION,
                                     variables={'runId': run_id_two})
    assert result.data['deletePipelineRun'][
        '__typename'] == 'PipelineRunNotFoundError'
예제 #17
0
def sqlite_instance_with_manager_disabled():
    with seven.TemporaryDirectory() as temp_dir:
        yield DagsterInstance.local_temp(
            tempdir=temp_dir, overrides={'dagit': {'execution_manager': {'disabled': True}}}
        )
예제 #18
0
def test_filesystem_persist_one_run():
    with seven.TemporaryDirectory() as temp_dir:
        do_test_single_write_read(DagsterInstance.local_temp(temp_dir))
예제 #19
0
def test_run_groups_over_time():
    with seven.TemporaryDirectory() as tempdir:
        instance = DagsterInstance.local_temp(tempdir=tempdir)

        repo_1 = get_repo_at_time_1()

        full_evolve_run_id = execute_pipeline(
            repo_1.get_pipeline('evolving_pipeline'), instance=instance).run_id
        foo_run_id = execute_pipeline(repo_1.get_pipeline('foo_pipeline'),
                                      instance=instance).run_id
        evolve_a_run_id = execute_pipeline(
            repo_1.get_pipeline('evolving_pipeline').get_pipeline_subset_def(
                {'solid_A'}),
            instance=instance,
        ).run_id
        evolve_b_run_id = execute_pipeline(
            repo_1.get_pipeline('evolving_pipeline').get_pipeline_subset_def(
                {'solid_B'}),
            instance=instance,
        ).run_id

        context_at_time_1 = define_context_for_file(__file__,
                                                    'get_repo_at_time_1',
                                                    instance)

        result = execute_dagster_graphql(context_at_time_1,
                                         ALL_RUN_GROUPS_QUERY)
        assert result.data
        assert 'runGroupsOrError' in result.data
        assert 'results' in result.data['runGroupsOrError']
        assert len(result.data['runGroupsOrError']['results']) == 4

        t1_runs = {
            run['runId']: run
            for group in result.data['runGroupsOrError']['results']
            for run in group['runs']
        }

        # test full_evolve_run_id
        assert t1_runs[full_evolve_run_id]['pipeline'] == {
            '__typename': 'PipelineSnapshot',
            'name': 'evolving_pipeline',
            'solidSelection': None,
        }

        # test foo_run_id
        assert t1_runs[foo_run_id]['pipeline'] == {
            '__typename': 'PipelineSnapshot',
            'name': 'foo_pipeline',
            'solidSelection': None,
        }

        # test evolve_a_run_id
        assert t1_runs[evolve_a_run_id]['pipeline'] == {
            '__typename': 'PipelineSnapshot',
            'name': 'evolving_pipeline',
            'solidSelection': ['solid_A'],
        }
        assert t1_runs[evolve_a_run_id]['pipelineSnapshotId']

        # test evolve_b_run_id
        assert t1_runs[evolve_b_run_id]['pipeline'] == {
            '__typename': 'PipelineSnapshot',
            'name': 'evolving_pipeline',
            'solidSelection': ['solid_B'],
        }

        context_at_time_2 = define_context_for_file(__file__,
                                                    'get_repo_at_time_2',
                                                    instance)

        result = execute_dagster_graphql(context_at_time_2,
                                         ALL_RUN_GROUPS_QUERY)
        assert 'runGroupsOrError' in result.data
        assert 'results' in result.data['runGroupsOrError']
        assert len(result.data['runGroupsOrError']['results']) == 4

        t2_runs = {
            run['runId']: run
            for group in result.data['runGroupsOrError']['results']
            for run in group['runs']
        }

        # test full_evolve_run_id
        assert t2_runs[full_evolve_run_id]['pipeline'] == {
            '__typename': 'PipelineSnapshot',
            'name': 'evolving_pipeline',
            'solidSelection': None,
        }

        # test evolve_a_run_id
        assert t2_runs[evolve_a_run_id]['pipeline'] == {
            '__typename': 'PipelineSnapshot',
            'name': 'evolving_pipeline',
            'solidSelection': ['solid_A'],
        }
        assert t2_runs[evolve_a_run_id]['pipelineSnapshotId']

        # names same
        assert (t1_runs[full_evolve_run_id]['pipeline']['name'] ==
                t2_runs[evolve_a_run_id]['pipeline']['name'])

        # snapshots differ
        assert (t1_runs[full_evolve_run_id]['pipelineSnapshotId'] !=
                t2_runs[evolve_a_run_id]['pipelineSnapshotId'])

        # pipeline name changed
        assert t2_runs[foo_run_id]['pipeline'] == {
            '__typename': 'PipelineSnapshot',
            'name': 'foo_pipeline',
            'solidSelection': None,
        }
        # subset no longer valid - b renamed
        assert t2_runs[evolve_b_run_id]['pipeline'] == {
            '__typename': 'PipelineSnapshot',
            'name': 'evolving_pipeline',
            'solidSelection': ['solid_B'],
        }
예제 #20
0
def test_execute_hammer_through_dagit():
    recon_repo = ReconstructableRepository.for_file(
        file_relative_path(
            __file__, '../../../../examples/dagster_examples/toys/hammer.py'),
        'hammer_pipeline',
    )
    instance = DagsterInstance.local_temp()

    execution_manager = SubprocessExecutionManager(instance)

    context = DagsterGraphQLContext(
        environments=[
            InProcessDagsterEnvironment(
                recon_repo,
                execution_manager=execution_manager,
            )
        ],
        instance=instance,
    )

    executor = SyncExecutor()

    variables = {
        'executionParams': {
            'environmentConfigData': {
                'storage': {
                    'filesystem': {}
                },
                'execution': {
                    'dask': {}
                }
            },
            'selector': {
                'name': 'hammer_pipeline'
            },
            'mode': 'default',
        }
    }

    start_pipeline_result = graphql(
        request_string=START_PIPELINE_EXECUTION_MUTATION,
        schema=create_schema(),
        context=context,
        variables=variables,
        executor=executor,
    )

    if start_pipeline_result.errors:
        raise Exception('{}'.format(start_pipeline_result.errors))

    run_id = start_pipeline_result.data['startPipelineExecution']['run'][
        'runId']

    context.legacy_environment.execution_manager.join()

    subscription = execute_dagster_graphql(context,
                                           SUBSCRIPTION_QUERY,
                                           variables={'runId': run_id})

    subscribe_results = []
    subscription.subscribe(subscribe_results.append)

    messages = [
        x['__typename']
        for x in subscribe_results[0].data['pipelineRunLogs']['messages']
    ]

    assert 'PipelineStartEvent' in messages
    assert 'PipelineSuccessEvent' in messages
예제 #21
0
파일: test_smoke.py 프로젝트: G9999/dagster
                pipelines {
                    name
                }
            }
        }
    }
}
"""


@pytest.mark.parametrize(
    "instance",
    [
        DagsterInstance.ephemeral(),
        DagsterInstance.local_temp(
            overrides={"opt_in": {
                "local_servers": True
            }}),
    ],
)
def test_smoke_app(instance):
    with get_workspace_from_kwargs(
            dict(module_name="dagit_tests.toy.bar_repo", definition="bar"),
            instance) as workspace:

        flask_app = app.create_app_from_workspace(workspace, instance)
        client = flask_app.test_client()

        result = client.post(
            "/graphql",
            data={"query": SMOKE_TEST_QUERY},
        )
예제 #22
0
def test_interrupt_multiproc():
    with seven.TemporaryDirectory() as tempdir:
        file_1 = os.path.join(tempdir, 'file_1')
        file_2 = os.path.join(tempdir, 'file_2')
        file_3 = os.path.join(tempdir, 'file_3')
        file_4 = os.path.join(tempdir, 'file_4')

        # launch a thread the waits until the file is written to launch an interrupt
        Thread(target=_send_kbd_int, args=([file_1, file_2, file_3,
                                            file_4], )).start()

        results = []
        try:
            # launch a pipeline that writes a file and loops infinitely
            # next time the launched thread wakes up it will send a keyboard
            # interrupt
            for result in execute_pipeline_iterator(
                    ExecutionTargetHandle.for_pipeline_python_file(
                        __file__,
                        'write_files_pipeline').build_pipeline_definition(),
                    environment_dict={
                        'solids': {
                            'write_1': {
                                'config': {
                                    'tempfile': file_1
                                }
                            },
                            'write_2': {
                                'config': {
                                    'tempfile': file_2
                                }
                            },
                            'write_3': {
                                'config': {
                                    'tempfile': file_3
                                }
                            },
                            'write_4': {
                                'config': {
                                    'tempfile': file_4
                                }
                            },
                        },
                        'execution': {
                            'multiprocess': {
                                'config': {
                                    'max_concurrent': 4
                                }
                            }
                        },
                        'storage': {
                            'filesystem': {}
                        },
                    },
                    instance=DagsterInstance.local_temp(),
            ):
                results.append(result.event_type)
            assert False  # should never reach
        except (DagsterSubprocessError, KeyboardInterrupt):
            pass

        assert results.count(DagsterEventType.STEP_FAILURE) == 4
        assert DagsterEventType.PIPELINE_FAILURE in results
예제 #23
0
def test_execute_hammer_through_dagit():
    recon_repo = ReconstructableRepository.for_file(
        file_relative_path(
            __file__, "../../../dagster-test/dagster_test/toys/hammer.py"),
        "hammer_pipeline",
    )
    instance = DagsterInstance.local_temp()

    context = DagsterGraphQLContext(
        workspace=Workspace([
            RepositoryLocationHandle.create_in_process_location(
                recon_repo.pointer)
        ]),
        instance=instance,
    )

    selector = infer_pipeline_selector(context, "hammer_pipeline")

    executor = SyncExecutor()

    variables = {
        "executionParams": {
            "runConfigData": {
                "storage": {
                    "filesystem": {}
                },
                "execution": {
                    "dask": {
                        "config": {
                            "cluster": {
                                "local": {}
                            }
                        }
                    }
                },
            },
            "selector": selector,
            "mode": "default",
        }
    }

    start_pipeline_result = graphql(
        request_string=LAUNCH_PIPELINE_EXECUTION_MUTATION,
        schema=create_schema(),
        context=context,
        variables=variables,
        executor=executor,
    )

    if start_pipeline_result.errors:
        raise Exception("{}".format(start_pipeline_result.errors))

    run_id = start_pipeline_result.data["launchPipelineExecution"]["run"][
        "runId"]

    context.drain_outstanding_executions()

    subscription = execute_dagster_graphql(context,
                                           SUBSCRIPTION_QUERY,
                                           variables={"runId": run_id})

    subscribe_results = []
    subscription.subscribe(subscribe_results.append)

    messages = [
        x["__typename"]
        for x in subscribe_results[0].data["pipelineRunLogs"]["messages"]
    ]

    assert "PipelineStartEvent" in messages
    assert "PipelineSuccessEvent" in messages
def test_run_not_found():
    context = define_test_subprocess_context(DagsterInstance.local_temp())
    result = execute_dagster_graphql(context, RUN_CANCELLATION_QUERY, variables={'runId': 'nope'})
    assert result.data['cancelPipelineExecution']['__typename'] == 'PipelineRunNotFoundError'
예제 #25
0
def test_empty_storage():
    instance = DagsterInstance.local_temp()
    assert list(instance.all_runs) == []
예제 #26
0
def _make_airflow_dag(
    recon_repo,
    pipeline_name,
    run_config=None,
    mode=None,
    instance=None,
    dag_id=None,
    dag_description=None,
    dag_kwargs=None,
    op_kwargs=None,
    operator=DagsterPythonOperator,
):
    check.inst_param(recon_repo, "recon_repo", ReconstructableRepository)
    check.str_param(pipeline_name, "pipeline_name")
    run_config = check.opt_dict_param(run_config, "run_config", key_type=str)
    mode = check.opt_str_param(mode, "mode")
    # Default to use the (persistent) system temp directory rather than a TemporaryDirectory,
    # which would not be consistent between Airflow task invocations.

    if instance is None:
        if is_dagster_home_set():
            instance = DagsterInstance.get()
        else:
            instance = DagsterInstance.local_temp(
                tempdir=seven.get_system_temp_directory())

    check.inst_param(instance, "instance", DagsterInstance)

    # Only used for Airflow; internally we continue to use pipeline.name
    dag_id = check.opt_str_param(dag_id, "dag_id",
                                 _rename_for_airflow(pipeline_name))

    dag_description = check.opt_str_param(dag_description, "dag_description",
                                          _make_dag_description(pipeline_name))
    check.subclass_param(operator, "operator", BaseOperator)

    dag_kwargs = dict(
        {"default_args": DEFAULT_ARGS},
        **check.opt_dict_param(dag_kwargs, "dag_kwargs", key_type=str),
    )

    op_kwargs = check.opt_dict_param(op_kwargs, "op_kwargs", key_type=str)

    dag = DAG(dag_id=dag_id, description=dag_description, **dag_kwargs)
    pipeline = recon_repo.get_definition().get_pipeline(pipeline_name)

    if mode is None:
        mode = pipeline.get_default_mode_name()

    execution_plan = create_execution_plan(pipeline, run_config, mode=mode)

    tasks = {}

    coalesced_plan = coalesce_execution_steps(execution_plan)

    for solid_handle, solid_steps in coalesced_plan.items():
        step_keys = [step.key for step in solid_steps]

        operator_parameters = DagsterOperatorParameters(
            recon_repo=recon_repo,
            pipeline_name=pipeline_name,
            run_config=run_config,
            mode=mode,
            task_id=solid_handle,
            step_keys=step_keys,
            dag=dag,
            instance_ref=instance.get_ref(),
            op_kwargs=op_kwargs,
            pipeline_snapshot=pipeline.get_pipeline_snapshot(),
            execution_plan_snapshot=snapshot_from_execution_plan(
                execution_plan,
                pipeline_snapshot_id=pipeline.get_pipeline_snapshot_id()),
        )
        task = operator(operator_parameters)

        tasks[solid_handle] = task

        for solid_step in solid_steps:
            for step_input in solid_step.step_inputs:
                for key in step_input.dependency_keys:
                    prev_solid_handle = execution_plan.get_step_by_key(
                        key).solid_handle.to_string()
                    if solid_handle != prev_solid_handle:
                        tasks[prev_solid_handle].set_downstream(task)

    return (dag,
            [tasks[solid_handle] for solid_handle in coalesced_plan.keys()])
예제 #27
0
def test_two_runs_running():
    run_id_one = make_new_run_id()
    run_id_two = make_new_run_id()
    handle = ExecutionTargetHandle.for_pipeline_python_file(
        __file__, 'infinite_loop_pipeline')

    with get_temp_file_locations(2) as files:
        file_one, file_two = files  # pylint: disable=unbalanced-tuple-unpacking

        instance = DagsterInstance.local_temp()

        execution_manager = SubprocessExecutionManager(instance)

        pipeline_run_one = instance.create_run(
            PipelineRun.create_empty_run(
                pipeline_name=infinite_loop_pipeline.name,
                run_id=run_id_one,
                environment_dict={
                    'solids': {
                        'loop': {
                            'config': {
                                'file': file_one
                            }
                        }
                    }
                },
            ))
        execution_manager.execute_pipeline(handle,
                                           infinite_loop_pipeline,
                                           pipeline_run_one,
                                           instance,
                                           raise_on_error=False)

        pipeline_run_two = instance.create_run(
            PipelineRun.create_empty_run(
                pipeline_name=infinite_loop_pipeline.name,
                run_id=run_id_two,
                environment_dict={
                    'solids': {
                        'loop': {
                            'config': {
                                'file': file_two
                            }
                        }
                    }
                },
            ))

        execution_manager.execute_pipeline(handle,
                                           infinite_loop_pipeline,
                                           pipeline_run_two,
                                           instance,
                                           raise_on_error=False)

        # ensure both runs have begun execution
        while not os.path.exists(file_one) and not os.path.exists(file_two):
            time.sleep(0.1)

        assert execution_manager.is_process_running(run_id_one)
        assert execution_manager.is_process_running(run_id_two)

        assert execution_manager.terminate(run_id_one)

        assert not execution_manager.is_process_running(run_id_one)
        assert execution_manager.is_process_running(run_id_two)

        assert execution_manager.terminate(run_id_two)

        assert not execution_manager.is_process_running(run_id_one)
        assert not execution_manager.is_process_running(run_id_two)
예제 #28
0
def test_runs_over_time():
    with seven.TemporaryDirectory() as temp_dir:
        instance = DagsterInstance.local_temp(temp_dir)

        repo_1 = get_repo_at_time_1()

        full_evolve_run_id = execute_pipeline(
            repo_1.get_pipeline('evolving_pipeline'), instance=instance).run_id
        foo_run_id = execute_pipeline(repo_1.get_pipeline('foo_pipeline'),
                                      instance=instance).run_id
        evolve_a_run_id = execute_pipeline(
            repo_1.get_pipeline('evolving_pipeline').subset_for_execution(
                ['solid_A']),
            instance=instance,
        ).run_id
        evolve_b_run_id = execute_pipeline(
            repo_1.get_pipeline('evolving_pipeline').subset_for_execution(
                ['solid_B']),
            instance=instance,
        ).run_id

        context_at_time_1 = define_context_for_file(__file__,
                                                    'get_repo_at_time_1',
                                                    instance)

        result = execute_dagster_graphql(context_at_time_1, ALL_RUNS_QUERY)
        assert result.data

        t1_runs = {
            run['runId']: run
            for run in result.data['pipelineRunsOrError']['results']
        }

        assert t1_runs[full_evolve_run_id]['pipeline'] == {
            '__typename': 'PipelineSnapshot',
            'name': 'evolving_pipeline',
            'solidSubset': None,
        }

        assert t1_runs[foo_run_id]['pipeline'] == {
            '__typename': 'PipelineSnapshot',
            'name': 'foo_pipeline',
            'solidSubset': None,
        }

        assert t1_runs[evolve_a_run_id]['pipeline'] == {
            '__typename': 'PipelineSnapshot',
            'name': 'evolving_pipeline',
            'solidSubset': ['solid_A'],
        }

        assert t1_runs[evolve_b_run_id]['pipeline'] == {
            '__typename': 'PipelineSnapshot',
            'name': 'evolving_pipeline',
            'solidSubset': ['solid_B'],
        }

        context_at_time_2 = define_context_for_file(__file__,
                                                    'get_repo_at_time_2',
                                                    instance)

        result = execute_dagster_graphql(context_at_time_2, ALL_RUNS_QUERY)
        assert result.data

        t2_runs = {
            run['runId']: run
            for run in result.data['pipelineRunsOrError']['results']
        }

        assert t2_runs[full_evolve_run_id]['pipeline'] == {
            '__typename': 'PipelineSnapshot',
            'name': 'evolving_pipeline',
            'solidSubset': None,
        }

        assert t2_runs[evolve_a_run_id]['pipeline'] == {
            '__typename': 'PipelineSnapshot',
            'name': 'evolving_pipeline',
            'solidSubset': ['solid_A'],
        }
        # pipeline name changed
        assert t2_runs[foo_run_id]['pipeline'] == {
            '__typename': 'PipelineSnapshot',
            'name': 'foo_pipeline',
            'solidSubset': None,
        }
        # subset no longer valid - b renamed
        assert t2_runs[evolve_b_run_id]['pipeline'] == {
            '__typename': 'PipelineSnapshot',
            'name': 'evolving_pipeline',
            'solidSubset': ['solid_B'],
        }
예제 #29
0
def test_retry_pipeline_execution():
    context = define_test_context(instance=DagsterInstance.local_temp())
    result = execute_dagster_graphql(
        context,
        START_PIPELINE_EXECUTION_QUERY,
        variables={
            'executionParams': {
                'mode': 'default',
                'selector': {
                    'name': 'eventually_successful'
                },
                'environmentConfigData': retry_config(0),
            }
        },
    )

    run_id = result.data['startPipelineExecution']['run']['runId']
    logs = result.data['startPipelineExecution']['run']['logs']['nodes']
    assert step_did_succeed(logs, 'spawn.compute')
    assert step_did_fail(logs, 'fail.compute')
    assert step_did_skip(logs, 'fail_2.compute')
    assert step_did_skip(logs, 'fail_3.compute')
    assert step_did_skip(logs, 'reset.compute')

    retry_one = execute_dagster_graphql(
        context,
        START_PIPELINE_EXECUTION_QUERY,
        variables={
            'executionParams': {
                'mode': 'default',
                'selector': {
                    'name': 'eventually_successful'
                },
                'environmentConfigData': retry_config(1),
                'retryRunId': run_id,
            }
        },
    )

    run_id = retry_one.data['startPipelineExecution']['run']['runId']
    logs = retry_one.data['startPipelineExecution']['run']['logs']['nodes']
    assert step_did_not_run(logs, 'spawn.compute')
    assert step_did_succeed(logs, 'fail.compute')
    assert step_did_fail(logs, 'fail_2.compute')
    assert step_did_skip(logs, 'fail_3.compute')
    assert step_did_skip(logs, 'reset.compute')

    retry_two = execute_dagster_graphql(
        context,
        START_PIPELINE_EXECUTION_QUERY,
        variables={
            'executionParams': {
                'mode': 'default',
                'selector': {
                    'name': 'eventually_successful'
                },
                'environmentConfigData': retry_config(2),
                'retryRunId': run_id,
            }
        },
    )

    run_id = retry_two.data['startPipelineExecution']['run']['runId']
    logs = retry_two.data['startPipelineExecution']['run']['logs']['nodes']

    assert step_did_not_run(logs, 'spawn.compute')
    assert step_did_not_run(logs, 'fail.compute')
    assert step_did_succeed(logs, 'fail_2.compute')
    assert step_did_fail(logs, 'fail_3.compute')
    assert step_did_skip(logs, 'reset.compute')

    retry_three = execute_dagster_graphql(
        context,
        START_PIPELINE_EXECUTION_QUERY,
        variables={
            'executionParams': {
                'mode': 'default',
                'selector': {
                    'name': 'eventually_successful'
                },
                'environmentConfigData': retry_config(3),
                'retryRunId': run_id,
            }
        },
    )

    run_id = retry_three.data['startPipelineExecution']['run']['runId']
    logs = retry_three.data['startPipelineExecution']['run']['logs']['nodes']

    assert step_did_not_run(logs, 'spawn.compute')
    assert step_did_not_run(logs, 'fail.compute')
    assert step_did_not_run(logs, 'fail_2.compute')
    assert step_did_succeed(logs, 'fail_3.compute')
    assert step_did_succeed(logs, 'reset.compute')
예제 #30
0
def test_multiprocessing_execution_for_composite_solid():
    environment_dict = {
        'solids': {
            'composite_with_nested_config_solid': {
                'solids': {
                    'node_a': {
                        'config': {
                            'foo': 'baz'
                        }
                    },
                    'node_b': {
                        'config': {
                            'bar': 3
                        }
                    }
                }
            }
        }
    }

    handle = ExecutionTargetHandle.for_pipeline_python_file(
        __file__, 'composite_pipeline')

    instance = DagsterInstance.local_temp()
    pipeline_run = instance.create_run_for_pipeline(
        pipeline=composite_pipeline,
        selector=ExecutionSelector('nonce'),
        environment_dict=environment_dict,
    )
    execution_manager = SubprocessExecutionManager(instance)
    execution_manager.execute_pipeline(handle, composite_pipeline,
                                       pipeline_run, instance)
    execution_manager.join()
    assert instance.get_run_by_id(
        pipeline_run.run_id).status == PipelineRunStatus.SUCCESS

    environment_dict = {
        'solids': {
            'composite_with_nested_config_solid': {
                'solids': {
                    'node_a': {
                        'config': {
                            'foo': 'baz'
                        }
                    },
                    'node_b': {
                        'config': {
                            'bar': 3
                        }
                    }
                }
            }
        },
        'execution': {
            'multiprocess': {}
        },
        'storage': {
            'filesystem': {}
        },
    }

    pipeline_run = instance.create_run_for_pipeline(
        pipeline=composite_pipeline,
        selector=ExecutionSelector('nonce'),
        environment_dict=environment_dict,
    )
    execution_manager = SubprocessExecutionManager(instance)
    execution_manager.execute_pipeline(handle, composite_pipeline,
                                       pipeline_run, instance)
    execution_manager.join()