def test_environment_dict_scheduler_error(self, graphql_context, snapshot): instance = graphql_context.instance repository = graphql_context.legacy_get_repository_definition() reconcile_scheduler_state("", "", repository, instance) result = execute_dagster_graphql_and_finish_runs( graphql_context, START_SCHEDULED_EXECUTION_QUERY, variables={'scheduleName': 'environment_dict_error_schedule'}, ) assert_start_scheduled_execution_success(result) run_id = result.data['startScheduledExecution']['run']['runId'] # Check tick data and stats through gql result = execute_dagster_graphql_and_finish_runs( graphql_context, SCHEDULE_TICKS_QUERY) schedule_result = next( x for x in result.data['scheduler']['runningSchedules'] if x['scheduleDefinition']['name'] == 'environment_dict_error_schedule') assert schedule_result['stats']['ticksSucceeded'] == 1 snapshot.assert_match(schedule_result) ticks = instance.get_schedule_ticks_by_schedule( repository.name, 'environment_dict_error_schedule') assert len(ticks) == 1 tick = ticks[0] assert tick.status == ScheduleTickStatus.SUCCESS assert tick.run_id == run_id
def test_partition_based_custom_selector(self, graphql_context): result = execute_dagster_graphql_and_finish_runs( graphql_context, START_SCHEDULED_EXECUTION_QUERY, variables={'scheduleName': 'partition_based_custom_selector'}, ) assert not result.errors assert result.data assert_start_scheduled_execution_success(result) assert uuid.UUID( result.data['startScheduledExecution']['run']['runId']) assert (result.data['startScheduledExecution']['run']['pipeline'] ['name'] == 'no_config_pipeline') tags = result.data['startScheduledExecution']['run']['tags'] assert any(tag['key'] == 'dagster/schedule_name' and tag['value'] == 'partition_based_custom_selector' for tag in tags) assert any(tag['key'] == 'dagster/partition' and tag['value'] == '9' for tag in tags) assert any(tag['key'] == 'dagster/partition_set' and tag['value'] == 'scheduled_integer_partitions' for tag in tags) result_two = execute_dagster_graphql_and_finish_runs( graphql_context, START_SCHEDULED_EXECUTION_QUERY, variables={'scheduleName': 'partition_based_custom_selector'}, ) tags = result_two.data['startScheduledExecution']['run']['tags'] # get a different partition based on the subsequent run storage assert any(tag['key'] == 'dagster/partition' and tag['value'] == '8' for tag in tags)
def test_tick_skip(self, graphql_context, snapshot): instance = graphql_context.instance repository = graphql_context.legacy_get_repository_definition() reconcile_scheduler_state("", "", repository, instance) execute_dagster_graphql_and_finish_runs( graphql_context, START_SCHEDULED_EXECUTION_QUERY, variables={'scheduleName': 'no_config_should_execute'}, ) # Check tick data and stats through gql result = execute_dagster_graphql_and_finish_runs( graphql_context, SCHEDULE_TICKS_QUERY) schedule_result = next( x for x in result.data['scheduler']['runningSchedules'] if x['scheduleDefinition']['name'] == 'no_config_should_execute') assert schedule_result['stats']['ticksSkipped'] == 1 snapshot.assert_match(schedule_result) ticks = instance.get_schedule_ticks_by_schedule( repository.name, 'no_config_should_execute') assert len(ticks) == 1 tick = ticks[0] assert tick.status == ScheduleTickStatus.SKIPPED
def test_should_execute_scheduler_error(self, graphql_context, snapshot): instance = graphql_context.instance repository = graphql_context.legacy_get_repository_definition() reconcile_scheduler_state("", "", repository, instance) execute_dagster_graphql_and_finish_runs( graphql_context, START_SCHEDULED_EXECUTION_QUERY, variables={'scheduleName': 'should_execute_error_schedule'}, ) # Check tick data and stats through gql result = execute_dagster_graphql_and_finish_runs( graphql_context, SCHEDULE_TICKS_QUERY) schedule_result = next( x for x in result.data['scheduler']['runningSchedules'] if x['scheduleDefinition']['name'] == 'should_execute_error_schedule') assert schedule_result['stats']['ticksFailed'] == 1 snapshot.assert_match(schedule_result) ticks = instance.get_schedule_ticks_by_schedule( repository.name, 'should_execute_error_schedule') assert len(ticks) == 1 tick = ticks[0] assert tick.status == ScheduleTickStatus.FAILURE assert tick.error assert ( "Error occurred during the execution should_execute for schedule " "should_execute_error_schedule" in tick.error.message)
def test_invalid_config_schedule_error(self, graphql_context, snapshot): repository = graphql_context.legacy_get_repository_definition() instance = graphql_context.instance reconcile_scheduler_state("", "", repository, instance) result = execute_dagster_graphql_and_finish_runs( graphql_context, START_SCHEDULED_EXECUTION_QUERY, variables={'scheduleName': 'invalid_config_schedule'}, ) assert (result.data['startScheduledExecution']['__typename'] == 'PipelineConfigValidationInvalid') # Check tick data and stats through gql result = execute_dagster_graphql_and_finish_runs( graphql_context, SCHEDULE_TICKS_QUERY) schedule_result = next( x for x in result.data['scheduler']['runningSchedules'] if x['scheduleDefinition']['name'] == 'invalid_config_schedule') assert schedule_result['stats']['ticksSucceeded'] == 1 snapshot.assert_match(schedule_result) ticks = instance.get_schedule_ticks_by_schedule( repository.name, 'invalid_config_schedule') assert len(ticks) == 1 tick = ticks[0] assert tick.status == ScheduleTickStatus.SUCCESS
def test_launch_partial_backfill(self, graphql_context): # execute a full pipeline, without the failure environment variable repository_selector = infer_repository_selector(graphql_context) partition_set_selector = { "repositorySelector": repository_selector, "partitionSetName": "chained_integer_partition", } result = execute_dagster_graphql_and_finish_runs( graphql_context, LAUNCH_PARTITION_BACKFILL_MUTATION, variables={ "backfillParams": { "selector": partition_set_selector, "partitionNames": ["2", "3"], } }, ) assert not result.errors assert result.data assert result.data["launchPartitionBackfill"][ "__typename"] == "PartitionBackfillSuccess" assert len( result.data["launchPartitionBackfill"]["launchedRunIds"]) == 2 for run_id in result.data["launchPartitionBackfill"]["launchedRunIds"]: logs = get_all_logs_for_finished_run_via_subscription( graphql_context, run_id)["pipelineRunLogs"]["messages"] assert step_did_succeed(logs, "always_succeed") assert step_did_succeed(logs, "conditionally_fail") assert step_did_succeed(logs, "after_failure") # reexecute a partial pipeline partial_steps = ["after_failure"] result = execute_dagster_graphql_and_finish_runs( graphql_context, LAUNCH_PARTITION_BACKFILL_MUTATION, variables={ "backfillParams": { "selector": partition_set_selector, "partitionNames": ["2", "3"], "reexecutionSteps": partial_steps, } }, ) assert not result.errors assert result.data assert result.data["launchPartitionBackfill"][ "__typename"] == "PartitionBackfillSuccess" assert len( result.data["launchPartitionBackfill"]["launchedRunIds"]) == 2 for run_id in result.data["launchPartitionBackfill"]["launchedRunIds"]: logs = get_all_logs_for_finished_run_via_subscription( graphql_context, run_id)["pipelineRunLogs"]["messages"] assert step_did_not_run(logs, "always_succeed") assert step_did_not_run(logs, "conditionally_fail") assert step_did_succeed(logs, "after_failure")
def test_launch_from_failure(self, graphql_context): repository_selector = infer_repository_selector(graphql_context) partition_set_selector = { "repositorySelector": repository_selector, "partitionSetName": "chained_integer_partition", } # trigger failure in the conditionally_fail solid with environ({"TEST_SOLID_SHOULD_FAIL": "YES"}): result = execute_dagster_graphql_and_finish_runs( graphql_context, LAUNCH_PARTITION_BACKFILL_MUTATION, variables={ "backfillParams": { "selector": partition_set_selector, "partitionNames": ["2", "3"], } }, ) assert not result.errors assert result.data assert result.data["launchPartitionBackfill"][ "__typename"] == "PartitionBackfillSuccess" assert len( result.data["launchPartitionBackfill"]["launchedRunIds"]) == 2 for run_id in result.data["launchPartitionBackfill"]["launchedRunIds"]: logs = get_all_logs_for_finished_run_via_subscription( graphql_context, run_id)["pipelineRunLogs"]["messages"] assert step_did_succeed(logs, "always_succeed.compute") assert step_did_fail(logs, "conditionally_fail.compute") assert step_did_skip(logs, "after_failure.compute") # re-execute from failure (without the failure environment variable) result = execute_dagster_graphql_and_finish_runs( graphql_context, LAUNCH_PARTITION_BACKFILL_MUTATION, variables={ "backfillParams": { "selector": partition_set_selector, "partitionNames": ["2", "3"], "fromFailure": True, } }, ) assert not result.errors assert result.data assert result.data["launchPartitionBackfill"][ "__typename"] == "PartitionBackfillSuccess" assert len( result.data["launchPartitionBackfill"]["launchedRunIds"]) == 2 for run_id in result.data["launchPartitionBackfill"]["launchedRunIds"]: logs = get_all_logs_for_finished_run_via_subscription( graphql_context, run_id)["pipelineRunLogs"]["messages"] assert step_did_not_run(logs, "always_succeed.compute") assert step_did_succeed(logs, "conditionally_fail.compute") assert step_did_succeed(logs, "after_failure.compute")
def test_retry_resource_pipeline(self, graphql_context): context = graphql_context selector = get_legacy_pipeline_selector(graphql_context, 'retry_resource_pipeline') result = execute_dagster_graphql_and_finish_runs( context, START_PIPELINE_EXECUTION_QUERY, variables={ 'executionParams': { 'mode': 'default', 'selector': selector, 'runConfigData': { 'storage': { 'filesystem': {} } }, } }, ) run_id = result.data['startPipelineExecution']['run']['runId'] logs = get_all_logs_for_finished_run_via_subscription( context, run_id)['pipelineRunLogs']['messages'] assert step_did_succeed(logs, 'start.compute') assert step_did_fail(logs, 'will_fail.compute') retry_one = execute_dagster_graphql_and_finish_runs( context, START_PIPELINE_REEXECUTION_QUERY, variables={ 'executionParams': { 'mode': 'default', 'selector': selector, 'runConfigData': { 'storage': { 'filesystem': {} } }, 'executionMetadata': { 'rootRunId': run_id, 'parentRunId': run_id, 'tags': [{ 'key': RESUME_RETRY_TAG, 'value': 'true' }], }, } }, ) run_id = retry_one.data['startPipelineReexecution']['run']['runId'] logs = get_all_logs_for_finished_run_via_subscription( context, run_id)['pipelineRunLogs']['messages'] assert step_did_not_run(logs, 'start.compute') assert step_did_fail(logs, 'will_fail.compute')
def test_retry_resource_pipeline(self, graphql_context): context = graphql_context selector = infer_pipeline_selector(graphql_context, "retry_resource_pipeline") result = execute_dagster_graphql_and_finish_runs( context, LAUNCH_PIPELINE_EXECUTION_MUTATION, variables={ "executionParams": { "mode": "default", "selector": selector, "runConfigData": { "storage": { "filesystem": {} } }, } }, ) run_id = result.data["launchPipelineExecution"]["run"]["runId"] logs = get_all_logs_for_finished_run_via_subscription( context, run_id)["pipelineRunLogs"]["messages"] assert step_did_succeed(logs, "start.compute") assert step_did_fail(logs, "will_fail.compute") retry_one = execute_dagster_graphql_and_finish_runs( context, LAUNCH_PIPELINE_REEXECUTION_MUTATION, variables={ "executionParams": { "mode": "default", "selector": selector, "runConfigData": { "storage": { "filesystem": {} } }, "executionMetadata": { "rootRunId": run_id, "parentRunId": run_id, "tags": [{ "key": RESUME_RETRY_TAG, "value": "true" }], }, } }, ) run_id = retry_one.data["launchPipelineReexecution"]["run"]["runId"] logs = get_all_logs_for_finished_run_via_subscription( context, run_id)["pipelineRunLogs"]["messages"] assert step_did_not_run(logs, "start.compute") assert step_did_fail(logs, "will_fail.compute")
def test_pipeline_reexecution_invalid_step_in_subset( self, graphql_context): run_id = make_new_run_id() selector = infer_pipeline_selector(graphql_context, "csv_hello_world") execute_dagster_graphql_and_finish_runs( graphql_context, LAUNCH_PIPELINE_REEXECUTION_MUTATION, variables={ "executionParams": { "selector": selector, "runConfigData": csv_hello_world_solids_config(), "executionMetadata": { "runId": run_id }, "mode": "default", } }, ) # retry new_run_id = make_new_run_id() result_two = execute_dagster_graphql_and_finish_runs( graphql_context, LAUNCH_PIPELINE_REEXECUTION_MUTATION, variables={ "executionParams": { "selector": selector, "runConfigData": csv_hello_world_solids_config(), "stepKeys": ["nope"], "executionMetadata": { "runId": new_run_id, "rootRunId": run_id, "parentRunId": run_id, "tags": [{ "key": RESUME_RETRY_TAG, "value": "true" }], }, "mode": "default", } }, ) query_result = result_two.data["launchPipelineReexecution"] assert query_result["__typename"] == "PythonError" assert query_result["className"] == "DagsterExecutionStepNotFoundError" assert "Can not build subset plan from unknown step: nope" in query_result[ "message"]
def test_pipeline_reexecution_invalid_step_in_subset( self, graphql_context): run_id = make_new_run_id() selector = get_legacy_pipeline_selector(graphql_context, 'csv_hello_world') execute_dagster_graphql_and_finish_runs( graphql_context, START_PIPELINE_EXECUTION_SNAPSHOT_QUERY, variables={ 'executionParams': { 'selector': selector, 'runConfigData': csv_hello_world_solids_config(), 'executionMetadata': { 'runId': run_id }, 'mode': 'default', } }, ) # retry new_run_id = make_new_run_id() result_two = execute_dagster_graphql_and_finish_runs( graphql_context, START_PIPELINE_REEXECUTION_SNAPSHOT_QUERY, variables={ 'executionParams': { 'selector': selector, 'runConfigData': csv_hello_world_solids_config(), 'stepKeys': ['nope'], 'executionMetadata': { 'runId': new_run_id, 'rootRunId': run_id, 'parentRunId': run_id, 'tags': [{ 'key': RESUME_RETRY_TAG, 'value': 'true' }], }, 'mode': 'default', } }, ) query_result = result_two.data['startPipelineReexecution'] assert query_result['__typename'] == 'InvalidStepError' assert query_result['invalidStepKey'] == 'nope'
def test_environment_dict_scheduler_error_serialize_cause(self, graphql_context): instance = graphql_context.instance repository = graphql_context.legacy_get_repository_definition() instance.reconcile_scheduler_state( repository=repository, python_path='/path/to/python', repository_path='/path/to/repository', ) result = execute_dagster_graphql_and_finish_runs( graphql_context, LAUNCH_SCHEDULED_EXECUTION_QUERY, variables={'scheduleName': 'environment_dict_error_schedule'}, ) assert_launch_scheduled_execution_success(result) run_id = result.data['launchScheduledExecution']['run']['runId'] ticks = instance.get_schedule_ticks_by_schedule( repository.name, 'environment_dict_error_schedule' ) assert len(ticks) == 1 tick = ticks[0] assert tick.status == ScheduleTickStatus.SUCCESS assert tick.run_id == run_id
def test_basic_start_scheduled_execution_with_environment_dict_fn( self, graphql_context): result = execute_dagster_graphql_and_finish_runs( graphql_context, START_SCHEDULED_EXECUTION_QUERY, variables={ 'scheduleName': 'no_config_pipeline_hourly_schedule_with_config_fn' }, ) assert not result.errors assert result.data # just test existence assert_start_scheduled_execution_success(result) assert uuid.UUID( result.data['startScheduledExecution']['run']['runId']) assert (result.data['startScheduledExecution']['run']['pipeline'] ['name'] == 'no_config_pipeline') assert any( tag['key'] == 'dagster/schedule_name' and tag['value'] == 'no_config_pipeline_hourly_schedule_with_config_fn' for tag in result.data['startScheduledExecution']['run']['tags'])
def test_retry_hard_failure(self, graphql_context): selector = infer_pipeline_selector(graphql_context, "hard_failer") result = execute_dagster_graphql_and_finish_runs( graphql_context, LAUNCH_PIPELINE_EXECUTION_MUTATION, variables={ "executionParams": { "mode": "default", "selector": selector, "runConfigData": {"solids": {"hard_fail_or_0": {"config": {"fail": True}}}}, } }, ) run_id = result.data["launchPipelineExecution"]["run"]["runId"] logs = get_all_logs_for_finished_run_via_subscription(graphql_context, run_id)[ "pipelineRunLogs" ]["messages"] assert step_started(logs, "hard_fail_or_0") assert step_did_not_run(logs, "hard_fail_or_0") assert step_did_not_run(logs, "increment") retry = execute_dagster_graphql_and_finish_runs( graphql_context, LAUNCH_PIPELINE_REEXECUTION_MUTATION, variables={ "executionParams": { "mode": "default", "selector": selector, "runConfigData": {"solids": {"hard_fail_or_0": {"config": {"fail": False}}}}, "executionMetadata": { "rootRunId": run_id, "parentRunId": run_id, "tags": [{"key": RESUME_RETRY_TAG, "value": "true"}], }, } }, ) run_id = retry.data["launchPipelineReexecution"]["run"]["runId"] logs = get_all_logs_for_finished_run_via_subscription(graphql_context, run_id)[ "pipelineRunLogs" ]["messages"] assert step_did_succeed(logs, "hard_fail_or_0") assert step_did_succeed(logs, "increment")
def test_tick_success(self, graphql_context, snapshot): context = graphql_context instance = context.instance repository = context.legacy_get_repository_definition() instance.reconcile_scheduler_state( repository=repository, python_path='/path/to/python', repository_path='/path/to/repository', ) schedule_def = repository.get_schedule_def("no_config_pipeline_hourly_schedule") start_time = time.time() execute_dagster_graphql_and_finish_runs( context, LAUNCH_SCHEDULED_EXECUTION_QUERY, variables={'scheduleName': schedule_def.name}, ) # Check tick data and stats through gql result = execute_dagster_graphql_and_finish_runs(context, SCHEDULE_TICKS_QUERY) assert result.data schedule_result = next( schedule_result for schedule_result in result.data['scheduler']['runningSchedules'] if schedule_result['scheduleDefinition']['name'] == schedule_def.name ) assert schedule_result assert schedule_result['stats']['ticksSucceeded'] == 1 snapshot.assert_match(schedule_result) # Check directly against the DB ticks = instance.get_schedule_ticks_by_schedule(repository.name, schedule_def.name) assert len(ticks) == 1 tick = ticks[0] assert tick.schedule_name == schedule_def.name assert tick.cron_schedule == schedule_def.cron_schedule assert tick.timestamp > start_time and tick.timestamp < time.time() assert tick.status == ScheduleTickStatus.SUCCESS assert tick.run_id
def test_partition_based_decorator(self, graphql_context): result = execute_dagster_graphql_and_finish_runs( graphql_context, START_SCHEDULED_EXECUTION_QUERY, variables={'scheduleName': 'partition_based_decorator'}, ) assert not result.errors assert result.data assert_start_scheduled_execution_success(result)
def test_query_multiple_schedule_ticks(self, graphql_context, snapshot): instance = graphql_context.instance repository = graphql_context.legacy_get_repository_definition() reconcile_scheduler_state("", "", repository, instance) for scheduleName in [ 'no_config_pipeline_hourly_schedule', 'no_config_should_execute', 'environment_dict_error_schedule', ]: execute_dagster_graphql_and_finish_runs( graphql_context, START_SCHEDULED_EXECUTION_QUERY, variables={'scheduleName': scheduleName}, ) result = execute_dagster_graphql_and_finish_runs( graphql_context, SCHEDULE_TICKS_QUERY) snapshot.assert_match(result.data['scheduler']['runningSchedules'])
def test_launch_scheduled_execution_with_should_execute(self, graphql_context): result = execute_dagster_graphql_and_finish_runs( graphql_context, LAUNCH_SCHEDULED_EXECUTION_QUERY, variables={'scheduleName': 'no_config_should_execute'}, ) assert not result.errors assert result.data assert result.data['launchScheduledExecution']['__typename'] == 'ScheduledExecutionBlocked'
def test_partition_based_execution(self, graphql_context): result = execute_dagster_graphql_and_finish_runs( graphql_context, LAUNCH_SCHEDULED_EXECUTION_QUERY, variables={'scheduleName': 'partition_based'}, ) assert not result.errors assert result.data # just test existence assert_launch_scheduled_execution_success(result) assert uuid.UUID(result.data['launchScheduledExecution']['run']['runId']) assert ( result.data['launchScheduledExecution']['run']['pipeline']['name'] == 'no_config_pipeline' ) tags = result.data['launchScheduledExecution']['run']['tags'] assert any( tag['key'] == 'dagster/schedule_name' and tag['value'] == 'partition_based' for tag in tags ) assert any(tag['key'] == 'dagster/partition' and tag['value'] == '9' for tag in tags) assert any( tag['key'] == 'dagster/partition_set' and tag['value'] == 'scheduled_integer_partitions' for tag in tags ) result_two = execute_dagster_graphql_and_finish_runs( graphql_context, LAUNCH_SCHEDULED_EXECUTION_QUERY, variables={'scheduleName': 'partition_based'}, ) tags = result_two.data['launchScheduledExecution']['run']['tags'] # the last partition is selected on subsequent runs assert any(tag['key'] == 'dagster/partition' and tag['value'] == '9' for tag in tags)
def test_tagged_pipeline_schedule(self, graphql_context): result = execute_dagster_graphql_and_finish_runs( graphql_context, START_SCHEDULED_EXECUTION_QUERY, variables={'scheduleName': 'tagged_pipeline_schedule'}, ) assert not result.errors assert_start_scheduled_execution_success(result) assert (result.data['startScheduledExecution']['run']['pipeline'] ['name'] == 'tagged_pipeline') assert any( tag['key'] == 'foo' and tag['value'] == 'bar' for tag in result.data['startScheduledExecution']['run']['tags'])
def test_pipeline_reexecution_info_query(self, graphql_context, snapshot): context = graphql_context selector = infer_pipeline_selector(graphql_context, 'csv_hello_world') run_id = make_new_run_id() execute_dagster_graphql_and_finish_runs( context, LAUNCH_PIPELINE_EXECUTION_QUERY, variables={ 'executionParams': { 'selector': selector, 'runConfigData': csv_hello_world_solids_config_fs_storage(), 'executionMetadata': {'runId': run_id}, 'mode': 'default', } }, ) # retry new_run_id = make_new_run_id() execute_dagster_graphql_and_finish_runs( context, LAUNCH_PIPELINE_REEXECUTION_SNAPSHOT_QUERY, variables={ 'executionParams': { 'selector': selector, 'runConfigData': csv_hello_world_solids_config_fs_storage(), 'stepKeys': ['sum_sq_solid.compute'], 'executionMetadata': { 'runId': new_run_id, 'rootRunId': run_id, 'parentRunId': run_id, 'tags': [{'key': RESUME_RETRY_TAG, 'value': 'true'}], }, 'mode': 'default', } }, ) result_one = execute_dagster_graphql_and_finish_runs( context, PIPELINE_REEXECUTION_INFO_QUERY, variables={'runId': run_id} ) query_result_one = result_one.data['pipelineRunOrError'] assert query_result_one['__typename'] == 'PipelineRun' assert query_result_one['stepKeysToExecute'] is None result_two = execute_dagster_graphql_and_finish_runs( context, PIPELINE_REEXECUTION_INFO_QUERY, variables={'runId': new_run_id} ) query_result_two = result_two.data['pipelineRunOrError'] assert query_result_two['__typename'] == 'PipelineRun' stepKeysToExecute = query_result_two['stepKeysToExecute'] assert stepKeysToExecute is not None snapshot.assert_match(stepKeysToExecute)
def test_pipeline_reexecution_info_query(self, graphql_context, snapshot): context = graphql_context selector = infer_pipeline_selector(graphql_context, "csv_hello_world") run_id = make_new_run_id() execute_dagster_graphql_and_finish_runs( context, LAUNCH_PIPELINE_EXECUTION_MUTATION, variables={ "executionParams": { "selector": selector, "runConfigData": csv_hello_world_solids_config(), "executionMetadata": {"runId": run_id}, "mode": "default", } }, ) # retry new_run_id = make_new_run_id() execute_dagster_graphql_and_finish_runs( context, LAUNCH_PIPELINE_REEXECUTION_MUTATION, variables={ "executionParams": { "selector": selector, "runConfigData": csv_hello_world_solids_config(), "stepKeys": ["sum_sq_solid"], "executionMetadata": { "runId": new_run_id, "rootRunId": run_id, "parentRunId": run_id, "tags": [{"key": RESUME_RETRY_TAG, "value": "true"}], }, "mode": "default", } }, ) result_one = execute_dagster_graphql_and_finish_runs( context, PIPELINE_REEXECUTION_INFO_QUERY, variables={"runId": run_id} ) query_result_one = result_one.data["pipelineRunOrError"] assert query_result_one["__typename"] == "Run" assert query_result_one["stepKeysToExecute"] is None result_two = execute_dagster_graphql_and_finish_runs( context, PIPELINE_REEXECUTION_INFO_QUERY, variables={"runId": new_run_id} ) query_result_two = result_two.data["pipelineRunOrError"] assert query_result_two["__typename"] == "Run" stepKeysToExecute = query_result_two["stepKeysToExecute"] assert stepKeysToExecute is not None snapshot.assert_match(stepKeysToExecute)
def _do_retry_intermediates_test(graphql_context, run_id, reexecution_run_id): selector = get_legacy_pipeline_selector(graphql_context, 'eventually_successful') logs = sync_execute_get_events( context=graphql_context, variables={ 'executionParams': { 'mode': 'default', 'selector': selector, 'executionMetadata': { 'runId': run_id }, } }, ) assert step_did_succeed(logs, 'spawn.compute') assert step_did_fail(logs, 'fail.compute') assert step_did_skip(logs, 'fail_2.compute') assert step_did_skip(logs, 'fail_3.compute') assert step_did_skip(logs, 'reset.compute') retry_one = execute_dagster_graphql_and_finish_runs( graphql_context, START_PIPELINE_REEXECUTION_QUERY, variables={ 'executionParams': { 'mode': 'default', 'selector': selector, 'executionMetadata': { 'runId': reexecution_run_id, 'rootRunId': run_id, 'parentRunId': run_id, 'tags': [{ 'key': RESUME_RETRY_TAG, 'value': 'true' }], }, } }, ) return retry_one
def _do_retry_intermediates_test(graphql_context, run_id, reexecution_run_id): selector = infer_pipeline_selector(graphql_context, "eventually_successful") logs = sync_execute_get_events( context=graphql_context, variables={ "executionParams": { "mode": "default", "selector": selector, "executionMetadata": { "runId": run_id }, } }, ) assert step_did_succeed(logs, "spawn.compute") assert step_did_fail(logs, "fail.compute") assert step_did_skip(logs, "fail_2.compute") assert step_did_skip(logs, "fail_3.compute") assert step_did_skip(logs, "reset.compute") retry_one = execute_dagster_graphql_and_finish_runs( graphql_context, LAUNCH_PIPELINE_REEXECUTION_MUTATION, variables={ "executionParams": { "mode": "default", "selector": selector, "executionMetadata": { "runId": reexecution_run_id, "rootRunId": run_id, "parentRunId": run_id, "tags": [{ "key": RESUME_RETRY_TAG, "value": "true" }], }, } }, ) return retry_one
def test_partition_based_multi_mode_decorator(self, graphql_context): result = execute_dagster_graphql_and_finish_runs( graphql_context, START_SCHEDULED_EXECUTION_QUERY, variables={'scheduleName': 'partition_based_multi_mode_decorator'}, ) assert not result.errors assert result.data assert_start_scheduled_execution_success(result) run_id = result.data['startScheduledExecution']['run']['runId'] logs = get_all_logs_for_finished_run_via_subscription( graphql_context, run_id)['pipelineRunLogs']['messages'] execution_step_names = [ log['step']['key'] for log in logs if log['__typename'] == 'ExecutionStepStartEvent' ] assert execution_step_names == ['return_six.compute']
def test_get_partition_runs(self, graphql_context): repository_selector = infer_repository_selector(graphql_context) result = execute_dagster_graphql_and_finish_runs( graphql_context, LAUNCH_PARTITION_BACKFILL_MUTATION, variables={ "backfillParams": { "selector": { "repositorySelector": repository_selector, "partitionSetName": "integer_partition", }, "partitionNames": ["2", "3"], "forceSynchronousSubmission": True, } }, ) assert not result.errors assert result.data["launchPartitionBackfill"][ "__typename"] == "PartitionBackfillSuccess" assert len( result.data["launchPartitionBackfill"]["launchedRunIds"]) == 2 run_ids = result.data["launchPartitionBackfill"]["launchedRunIds"] result = execute_dagster_graphql( graphql_context, query=GET_PARTITION_SET_RUNS_QUERY, variables={ "partitionSetName": "integer_partition", "repositorySelector": repository_selector, }, ) assert not result.errors assert result.data partitions = result.data["partitionSetOrError"]["partitionsOrError"][ "results"] assert len(partitions) == 10 for partition in partitions: if partition["name"] not in ("2", "3"): assert len(partition["runs"]) == 0 else: assert len(partition["runs"]) == 1 assert partition["runs"][0]["runId"] in run_ids
def test_launch_partial_backfill(self, graphql_context): # execute a full pipeline, without the failure environment variable repository_selector = infer_repository_selector(graphql_context) partition_set_selector = { "repositorySelector": repository_selector, "partitionSetName": "chained_integer_partition", } # reexecute a partial pipeline partial_steps = ["after_failure"] result = execute_dagster_graphql_and_finish_runs( graphql_context, LAUNCH_PARTITION_BACKFILL_MUTATION, variables={ "backfillParams": { "selector": partition_set_selector, "partitionNames": ["2", "3"], "reexecutionSteps": partial_steps, } }, ) assert not result.errors assert result.data assert result.data["launchPartitionBackfill"][ "__typename"] == "PartitionBackfillSuccess" backfill_id = result.data["launchPartitionBackfill"]["backfillId"] result = execute_dagster_graphql(graphql_context, PARTITION_PROGRESS_QUERY, variables={"backfillId": backfill_id}) assert not result.errors assert result.data assert result.data["partitionBackfillOrError"][ "__typename"] == "PartitionBackfill" assert result.data["partitionBackfillOrError"]["status"] == "REQUESTED" assert result.data["partitionBackfillOrError"]["isPersisted"] assert result.data["partitionBackfillOrError"]["numRequested"] == 0 assert result.data["partitionBackfillOrError"]["numTotal"] == 2 assert result.data["partitionBackfillOrError"]["reexecutionSteps"] == [ "after_failure" ]
def test_tagged_pipeline_scheduled_execution_with_run_launcher(self, graphql_context): result = execute_dagster_graphql_and_finish_runs( graphql_context, LAUNCH_SCHEDULED_EXECUTION_QUERY, variables={'scheduleName': 'tagged_pipeline_schedule'}, ) assert not result.errors assert result.data # just test existence assert_launch_scheduled_execution_success(result) assert uuid.UUID(result.data['launchScheduledExecution']['run']['runId']) assert ( result.data['launchScheduledExecution']['run']['pipeline']['name'] == 'tagged_pipeline' ) assert any( tag['key'] == 'foo' and tag['value'] == 'bar' for tag in result.data['launchScheduledExecution']['run']['tags'] )
def test_just_basic_launch_scheduled_execution(self, graphql_context): result = execute_dagster_graphql_and_finish_runs( graphql_context, LAUNCH_SCHEDULED_EXECUTION_QUERY, variables={'scheduleName': 'no_config_pipeline_hourly_schedule'}, ) assert not result.errors assert result.data assert_launch_scheduled_execution_success(result) assert uuid.UUID(result.data['launchScheduledExecution']['run']['runId']) assert ( result.data['launchScheduledExecution']['run']['pipeline']['name'] == 'no_config_pipeline' ) assert any( tag['key'] == 'dagster/schedule_name' and tag['value'] == 'no_config_pipeline_hourly_schedule' for tag in result.data['launchScheduledExecution']['run']['tags'] )
def test_retry_pipeline_execution(self, graphql_context): selector = infer_pipeline_selector(graphql_context, "eventually_successful") result = execute_dagster_graphql_and_finish_runs( graphql_context, LAUNCH_PIPELINE_EXECUTION_MUTATION, variables={ "executionParams": { "mode": "default", "selector": selector, "runConfigData": retry_config(0), } }, ) run_id = result.data["launchPipelineExecution"]["run"]["runId"] logs = get_all_logs_for_finished_run_via_subscription( graphql_context, run_id)["pipelineRunLogs"]["messages"] assert step_did_succeed(logs, "spawn.compute") assert step_did_fail(logs, "fail.compute") assert step_did_skip(logs, "fail_2.compute") assert step_did_skip(logs, "fail_3.compute") assert step_did_skip(logs, "reset.compute") retry_one = execute_dagster_graphql_and_finish_runs( graphql_context, LAUNCH_PIPELINE_REEXECUTION_MUTATION, variables={ "executionParams": { "mode": "default", "selector": selector, "runConfigData": retry_config(1), "executionMetadata": { "rootRunId": run_id, "parentRunId": run_id, "tags": [{ "key": RESUME_RETRY_TAG, "value": "true" }], }, } }, ) run_id = retry_one.data["launchPipelineReexecution"]["run"]["runId"] logs = get_all_logs_for_finished_run_via_subscription( graphql_context, run_id)["pipelineRunLogs"]["messages"] assert step_did_not_run(logs, "spawn.compute") assert step_did_succeed(logs, "fail.compute") assert step_did_fail(logs, "fail_2.compute") assert step_did_skip(logs, "fail_3.compute") assert step_did_skip(logs, "reset.compute") retry_two = execute_dagster_graphql_and_finish_runs( graphql_context, LAUNCH_PIPELINE_REEXECUTION_MUTATION, variables={ "executionParams": { "mode": "default", "selector": selector, "runConfigData": retry_config(2), "executionMetadata": { "rootRunId": run_id, "parentRunId": run_id, "tags": [{ "key": RESUME_RETRY_TAG, "value": "true" }], }, } }, ) run_id = retry_two.data["launchPipelineReexecution"]["run"]["runId"] logs = get_all_logs_for_finished_run_via_subscription( graphql_context, run_id)["pipelineRunLogs"]["messages"] assert step_did_not_run(logs, "spawn.compute") assert step_did_not_run(logs, "fail.compute") assert step_did_succeed(logs, "fail_2.compute") assert step_did_fail(logs, "fail_3.compute") assert step_did_skip(logs, "reset.compute") retry_three = execute_dagster_graphql_and_finish_runs( graphql_context, LAUNCH_PIPELINE_REEXECUTION_MUTATION, variables={ "executionParams": { "mode": "default", "selector": selector, "runConfigData": retry_config(3), "executionMetadata": { "rootRunId": run_id, "parentRunId": run_id, "tags": [{ "key": RESUME_RETRY_TAG, "value": "true" }], }, } }, ) run_id = retry_three.data["launchPipelineReexecution"]["run"]["runId"] logs = get_all_logs_for_finished_run_via_subscription( graphql_context, run_id)["pipelineRunLogs"]["messages"] assert step_did_not_run(logs, "spawn.compute") assert step_did_not_run(logs, "fail.compute") assert step_did_not_run(logs, "fail_2.compute") assert step_did_succeed(logs, "fail_3.compute") assert step_did_succeed(logs, "reset.compute")