Example #1
0
    def test_environment_dict_scheduler_error(self, graphql_context, snapshot):
        instance = graphql_context.instance
        repository = graphql_context.legacy_get_repository_definition()
        reconcile_scheduler_state("", "", repository, instance)

        result = execute_dagster_graphql_and_finish_runs(
            graphql_context,
            START_SCHEDULED_EXECUTION_QUERY,
            variables={'scheduleName': 'environment_dict_error_schedule'},
        )
        assert_start_scheduled_execution_success(result)
        run_id = result.data['startScheduledExecution']['run']['runId']

        # Check tick data and stats through gql
        result = execute_dagster_graphql_and_finish_runs(
            graphql_context, SCHEDULE_TICKS_QUERY)
        schedule_result = next(
            x for x in result.data['scheduler']['runningSchedules']
            if x['scheduleDefinition']['name'] ==
            'environment_dict_error_schedule')
        assert schedule_result['stats']['ticksSucceeded'] == 1
        snapshot.assert_match(schedule_result)

        ticks = instance.get_schedule_ticks_by_schedule(
            repository.name, 'environment_dict_error_schedule')

        assert len(ticks) == 1
        tick = ticks[0]
        assert tick.status == ScheduleTickStatus.SUCCESS
        assert tick.run_id == run_id
Example #2
0
    def test_partition_based_custom_selector(self, graphql_context):
        result = execute_dagster_graphql_and_finish_runs(
            graphql_context,
            START_SCHEDULED_EXECUTION_QUERY,
            variables={'scheduleName': 'partition_based_custom_selector'},
        )

        assert not result.errors
        assert result.data
        assert_start_scheduled_execution_success(result)
        assert uuid.UUID(
            result.data['startScheduledExecution']['run']['runId'])
        assert (result.data['startScheduledExecution']['run']['pipeline']
                ['name'] == 'no_config_pipeline')
        tags = result.data['startScheduledExecution']['run']['tags']
        assert any(tag['key'] == 'dagster/schedule_name'
                   and tag['value'] == 'partition_based_custom_selector'
                   for tag in tags)
        assert any(tag['key'] == 'dagster/partition' and tag['value'] == '9'
                   for tag in tags)
        assert any(tag['key'] == 'dagster/partition_set'
                   and tag['value'] == 'scheduled_integer_partitions'
                   for tag in tags)

        result_two = execute_dagster_graphql_and_finish_runs(
            graphql_context,
            START_SCHEDULED_EXECUTION_QUERY,
            variables={'scheduleName': 'partition_based_custom_selector'},
        )
        tags = result_two.data['startScheduledExecution']['run']['tags']
        # get a different partition based on the subsequent run storage

        assert any(tag['key'] == 'dagster/partition' and tag['value'] == '8'
                   for tag in tags)
Example #3
0
    def test_tick_skip(self, graphql_context, snapshot):
        instance = graphql_context.instance

        repository = graphql_context.legacy_get_repository_definition()
        reconcile_scheduler_state("", "", repository, instance)

        execute_dagster_graphql_and_finish_runs(
            graphql_context,
            START_SCHEDULED_EXECUTION_QUERY,
            variables={'scheduleName': 'no_config_should_execute'},
        )

        # Check tick data and stats through gql
        result = execute_dagster_graphql_and_finish_runs(
            graphql_context, SCHEDULE_TICKS_QUERY)
        schedule_result = next(
            x for x in result.data['scheduler']['runningSchedules']
            if x['scheduleDefinition']['name'] == 'no_config_should_execute')
        assert schedule_result['stats']['ticksSkipped'] == 1
        snapshot.assert_match(schedule_result)

        ticks = instance.get_schedule_ticks_by_schedule(
            repository.name, 'no_config_should_execute')

        assert len(ticks) == 1
        tick = ticks[0]
        assert tick.status == ScheduleTickStatus.SKIPPED
Example #4
0
    def test_should_execute_scheduler_error(self, graphql_context, snapshot):
        instance = graphql_context.instance
        repository = graphql_context.legacy_get_repository_definition()
        reconcile_scheduler_state("", "", repository, instance)

        execute_dagster_graphql_and_finish_runs(
            graphql_context,
            START_SCHEDULED_EXECUTION_QUERY,
            variables={'scheduleName': 'should_execute_error_schedule'},
        )

        # Check tick data and stats through gql
        result = execute_dagster_graphql_and_finish_runs(
            graphql_context, SCHEDULE_TICKS_QUERY)
        schedule_result = next(
            x for x in result.data['scheduler']['runningSchedules'] if
            x['scheduleDefinition']['name'] == 'should_execute_error_schedule')
        assert schedule_result['stats']['ticksFailed'] == 1
        snapshot.assert_match(schedule_result)

        ticks = instance.get_schedule_ticks_by_schedule(
            repository.name, 'should_execute_error_schedule')

        assert len(ticks) == 1
        tick = ticks[0]
        assert tick.status == ScheduleTickStatus.FAILURE
        assert tick.error
        assert (
            "Error occurred during the execution should_execute for schedule "
            "should_execute_error_schedule" in tick.error.message)
Example #5
0
    def test_invalid_config_schedule_error(self, graphql_context, snapshot):
        repository = graphql_context.legacy_get_repository_definition()
        instance = graphql_context.instance
        reconcile_scheduler_state("", "", repository, instance)
        result = execute_dagster_graphql_and_finish_runs(
            graphql_context,
            START_SCHEDULED_EXECUTION_QUERY,
            variables={'scheduleName': 'invalid_config_schedule'},
        )

        assert (result.data['startScheduledExecution']['__typename'] ==
                'PipelineConfigValidationInvalid')

        # Check tick data and stats through gql
        result = execute_dagster_graphql_and_finish_runs(
            graphql_context, SCHEDULE_TICKS_QUERY)
        schedule_result = next(
            x for x in result.data['scheduler']['runningSchedules']
            if x['scheduleDefinition']['name'] == 'invalid_config_schedule')
        assert schedule_result['stats']['ticksSucceeded'] == 1
        snapshot.assert_match(schedule_result)

        ticks = instance.get_schedule_ticks_by_schedule(
            repository.name, 'invalid_config_schedule')

        assert len(ticks) == 1
        tick = ticks[0]
        assert tick.status == ScheduleTickStatus.SUCCESS
Example #6
0
    def test_launch_partial_backfill(self, graphql_context):
        # execute a full pipeline, without the failure environment variable
        repository_selector = infer_repository_selector(graphql_context)
        partition_set_selector = {
            "repositorySelector": repository_selector,
            "partitionSetName": "chained_integer_partition",
        }

        result = execute_dagster_graphql_and_finish_runs(
            graphql_context,
            LAUNCH_PARTITION_BACKFILL_MUTATION,
            variables={
                "backfillParams": {
                    "selector": partition_set_selector,
                    "partitionNames": ["2", "3"],
                }
            },
        )
        assert not result.errors
        assert result.data
        assert result.data["launchPartitionBackfill"][
            "__typename"] == "PartitionBackfillSuccess"
        assert len(
            result.data["launchPartitionBackfill"]["launchedRunIds"]) == 2
        for run_id in result.data["launchPartitionBackfill"]["launchedRunIds"]:
            logs = get_all_logs_for_finished_run_via_subscription(
                graphql_context, run_id)["pipelineRunLogs"]["messages"]
            assert step_did_succeed(logs, "always_succeed")
            assert step_did_succeed(logs, "conditionally_fail")
            assert step_did_succeed(logs, "after_failure")

        # reexecute a partial pipeline
        partial_steps = ["after_failure"]
        result = execute_dagster_graphql_and_finish_runs(
            graphql_context,
            LAUNCH_PARTITION_BACKFILL_MUTATION,
            variables={
                "backfillParams": {
                    "selector": partition_set_selector,
                    "partitionNames": ["2", "3"],
                    "reexecutionSteps": partial_steps,
                }
            },
        )
        assert not result.errors
        assert result.data
        assert result.data["launchPartitionBackfill"][
            "__typename"] == "PartitionBackfillSuccess"
        assert len(
            result.data["launchPartitionBackfill"]["launchedRunIds"]) == 2
        for run_id in result.data["launchPartitionBackfill"]["launchedRunIds"]:
            logs = get_all_logs_for_finished_run_via_subscription(
                graphql_context, run_id)["pipelineRunLogs"]["messages"]
            assert step_did_not_run(logs, "always_succeed")
            assert step_did_not_run(logs, "conditionally_fail")
            assert step_did_succeed(logs, "after_failure")
Example #7
0
    def test_launch_from_failure(self, graphql_context):
        repository_selector = infer_repository_selector(graphql_context)
        partition_set_selector = {
            "repositorySelector": repository_selector,
            "partitionSetName": "chained_integer_partition",
        }

        # trigger failure in the conditionally_fail solid
        with environ({"TEST_SOLID_SHOULD_FAIL": "YES"}):
            result = execute_dagster_graphql_and_finish_runs(
                graphql_context,
                LAUNCH_PARTITION_BACKFILL_MUTATION,
                variables={
                    "backfillParams": {
                        "selector": partition_set_selector,
                        "partitionNames": ["2", "3"],
                    }
                },
            )
        assert not result.errors
        assert result.data
        assert result.data["launchPartitionBackfill"][
            "__typename"] == "PartitionBackfillSuccess"
        assert len(
            result.data["launchPartitionBackfill"]["launchedRunIds"]) == 2
        for run_id in result.data["launchPartitionBackfill"]["launchedRunIds"]:
            logs = get_all_logs_for_finished_run_via_subscription(
                graphql_context, run_id)["pipelineRunLogs"]["messages"]
            assert step_did_succeed(logs, "always_succeed.compute")
            assert step_did_fail(logs, "conditionally_fail.compute")
            assert step_did_skip(logs, "after_failure.compute")

        # re-execute from failure (without the failure environment variable)
        result = execute_dagster_graphql_and_finish_runs(
            graphql_context,
            LAUNCH_PARTITION_BACKFILL_MUTATION,
            variables={
                "backfillParams": {
                    "selector": partition_set_selector,
                    "partitionNames": ["2", "3"],
                    "fromFailure": True,
                }
            },
        )
        assert not result.errors
        assert result.data
        assert result.data["launchPartitionBackfill"][
            "__typename"] == "PartitionBackfillSuccess"
        assert len(
            result.data["launchPartitionBackfill"]["launchedRunIds"]) == 2
        for run_id in result.data["launchPartitionBackfill"]["launchedRunIds"]:
            logs = get_all_logs_for_finished_run_via_subscription(
                graphql_context, run_id)["pipelineRunLogs"]["messages"]
            assert step_did_not_run(logs, "always_succeed.compute")
            assert step_did_succeed(logs, "conditionally_fail.compute")
            assert step_did_succeed(logs, "after_failure.compute")
Example #8
0
    def test_retry_resource_pipeline(self, graphql_context):
        context = graphql_context
        selector = get_legacy_pipeline_selector(graphql_context,
                                                'retry_resource_pipeline')
        result = execute_dagster_graphql_and_finish_runs(
            context,
            START_PIPELINE_EXECUTION_QUERY,
            variables={
                'executionParams': {
                    'mode': 'default',
                    'selector': selector,
                    'runConfigData': {
                        'storage': {
                            'filesystem': {}
                        }
                    },
                }
            },
        )

        run_id = result.data['startPipelineExecution']['run']['runId']
        logs = get_all_logs_for_finished_run_via_subscription(
            context, run_id)['pipelineRunLogs']['messages']
        assert step_did_succeed(logs, 'start.compute')
        assert step_did_fail(logs, 'will_fail.compute')

        retry_one = execute_dagster_graphql_and_finish_runs(
            context,
            START_PIPELINE_REEXECUTION_QUERY,
            variables={
                'executionParams': {
                    'mode': 'default',
                    'selector': selector,
                    'runConfigData': {
                        'storage': {
                            'filesystem': {}
                        }
                    },
                    'executionMetadata': {
                        'rootRunId': run_id,
                        'parentRunId': run_id,
                        'tags': [{
                            'key': RESUME_RETRY_TAG,
                            'value': 'true'
                        }],
                    },
                }
            },
        )
        run_id = retry_one.data['startPipelineReexecution']['run']['runId']
        logs = get_all_logs_for_finished_run_via_subscription(
            context, run_id)['pipelineRunLogs']['messages']
        assert step_did_not_run(logs, 'start.compute')
        assert step_did_fail(logs, 'will_fail.compute')
    def test_retry_resource_pipeline(self, graphql_context):
        context = graphql_context
        selector = infer_pipeline_selector(graphql_context,
                                           "retry_resource_pipeline")
        result = execute_dagster_graphql_and_finish_runs(
            context,
            LAUNCH_PIPELINE_EXECUTION_MUTATION,
            variables={
                "executionParams": {
                    "mode": "default",
                    "selector": selector,
                    "runConfigData": {
                        "storage": {
                            "filesystem": {}
                        }
                    },
                }
            },
        )

        run_id = result.data["launchPipelineExecution"]["run"]["runId"]
        logs = get_all_logs_for_finished_run_via_subscription(
            context, run_id)["pipelineRunLogs"]["messages"]
        assert step_did_succeed(logs, "start.compute")
        assert step_did_fail(logs, "will_fail.compute")

        retry_one = execute_dagster_graphql_and_finish_runs(
            context,
            LAUNCH_PIPELINE_REEXECUTION_MUTATION,
            variables={
                "executionParams": {
                    "mode": "default",
                    "selector": selector,
                    "runConfigData": {
                        "storage": {
                            "filesystem": {}
                        }
                    },
                    "executionMetadata": {
                        "rootRunId": run_id,
                        "parentRunId": run_id,
                        "tags": [{
                            "key": RESUME_RETRY_TAG,
                            "value": "true"
                        }],
                    },
                }
            },
        )
        run_id = retry_one.data["launchPipelineReexecution"]["run"]["runId"]
        logs = get_all_logs_for_finished_run_via_subscription(
            context, run_id)["pipelineRunLogs"]["messages"]
        assert step_did_not_run(logs, "start.compute")
        assert step_did_fail(logs, "will_fail.compute")
Example #10
0
    def test_pipeline_reexecution_invalid_step_in_subset(
            self, graphql_context):
        run_id = make_new_run_id()
        selector = infer_pipeline_selector(graphql_context, "csv_hello_world")
        execute_dagster_graphql_and_finish_runs(
            graphql_context,
            LAUNCH_PIPELINE_REEXECUTION_MUTATION,
            variables={
                "executionParams": {
                    "selector": selector,
                    "runConfigData": csv_hello_world_solids_config(),
                    "executionMetadata": {
                        "runId": run_id
                    },
                    "mode": "default",
                }
            },
        )

        # retry
        new_run_id = make_new_run_id()

        result_two = execute_dagster_graphql_and_finish_runs(
            graphql_context,
            LAUNCH_PIPELINE_REEXECUTION_MUTATION,
            variables={
                "executionParams": {
                    "selector": selector,
                    "runConfigData": csv_hello_world_solids_config(),
                    "stepKeys": ["nope"],
                    "executionMetadata": {
                        "runId": new_run_id,
                        "rootRunId": run_id,
                        "parentRunId": run_id,
                        "tags": [{
                            "key": RESUME_RETRY_TAG,
                            "value": "true"
                        }],
                    },
                    "mode": "default",
                }
            },
        )

        query_result = result_two.data["launchPipelineReexecution"]
        assert query_result["__typename"] == "PythonError"
        assert query_result["className"] == "DagsterExecutionStepNotFoundError"
        assert "Can not build subset plan from unknown step: nope" in query_result[
            "message"]
Example #11
0
    def test_pipeline_reexecution_invalid_step_in_subset(
            self, graphql_context):
        run_id = make_new_run_id()
        selector = get_legacy_pipeline_selector(graphql_context,
                                                'csv_hello_world')
        execute_dagster_graphql_and_finish_runs(
            graphql_context,
            START_PIPELINE_EXECUTION_SNAPSHOT_QUERY,
            variables={
                'executionParams': {
                    'selector': selector,
                    'runConfigData': csv_hello_world_solids_config(),
                    'executionMetadata': {
                        'runId': run_id
                    },
                    'mode': 'default',
                }
            },
        )

        # retry
        new_run_id = make_new_run_id()

        result_two = execute_dagster_graphql_and_finish_runs(
            graphql_context,
            START_PIPELINE_REEXECUTION_SNAPSHOT_QUERY,
            variables={
                'executionParams': {
                    'selector': selector,
                    'runConfigData': csv_hello_world_solids_config(),
                    'stepKeys': ['nope'],
                    'executionMetadata': {
                        'runId': new_run_id,
                        'rootRunId': run_id,
                        'parentRunId': run_id,
                        'tags': [{
                            'key': RESUME_RETRY_TAG,
                            'value': 'true'
                        }],
                    },
                    'mode': 'default',
                }
            },
        )

        query_result = result_two.data['startPipelineReexecution']
        assert query_result['__typename'] == 'InvalidStepError'
        assert query_result['invalidStepKey'] == 'nope'
Example #12
0
    def test_environment_dict_scheduler_error_serialize_cause(self, graphql_context):
        instance = graphql_context.instance
        repository = graphql_context.legacy_get_repository_definition()
        instance.reconcile_scheduler_state(
            repository=repository,
            python_path='/path/to/python',
            repository_path='/path/to/repository',
        )

        result = execute_dagster_graphql_and_finish_runs(
            graphql_context,
            LAUNCH_SCHEDULED_EXECUTION_QUERY,
            variables={'scheduleName': 'environment_dict_error_schedule'},
        )
        assert_launch_scheduled_execution_success(result)
        run_id = result.data['launchScheduledExecution']['run']['runId']

        ticks = instance.get_schedule_ticks_by_schedule(
            repository.name, 'environment_dict_error_schedule'
        )

        assert len(ticks) == 1
        tick = ticks[0]
        assert tick.status == ScheduleTickStatus.SUCCESS
        assert tick.run_id == run_id
Example #13
0
    def test_basic_start_scheduled_execution_with_environment_dict_fn(
            self, graphql_context):
        result = execute_dagster_graphql_and_finish_runs(
            graphql_context,
            START_SCHEDULED_EXECUTION_QUERY,
            variables={
                'scheduleName':
                'no_config_pipeline_hourly_schedule_with_config_fn'
            },
        )

        assert not result.errors
        assert result.data

        # just test existence
        assert_start_scheduled_execution_success(result)

        assert uuid.UUID(
            result.data['startScheduledExecution']['run']['runId'])
        assert (result.data['startScheduledExecution']['run']['pipeline']
                ['name'] == 'no_config_pipeline')

        assert any(
            tag['key'] == 'dagster/schedule_name' and tag['value'] ==
            'no_config_pipeline_hourly_schedule_with_config_fn'
            for tag in result.data['startScheduledExecution']['run']['tags'])
Example #14
0
    def test_retry_hard_failure(self, graphql_context):
        selector = infer_pipeline_selector(graphql_context, "hard_failer")
        result = execute_dagster_graphql_and_finish_runs(
            graphql_context,
            LAUNCH_PIPELINE_EXECUTION_MUTATION,
            variables={
                "executionParams": {
                    "mode": "default",
                    "selector": selector,
                    "runConfigData": {"solids": {"hard_fail_or_0": {"config": {"fail": True}}}},
                }
            },
        )

        run_id = result.data["launchPipelineExecution"]["run"]["runId"]
        logs = get_all_logs_for_finished_run_via_subscription(graphql_context, run_id)[
            "pipelineRunLogs"
        ]["messages"]

        assert step_started(logs, "hard_fail_or_0")
        assert step_did_not_run(logs, "hard_fail_or_0")
        assert step_did_not_run(logs, "increment")

        retry = execute_dagster_graphql_and_finish_runs(
            graphql_context,
            LAUNCH_PIPELINE_REEXECUTION_MUTATION,
            variables={
                "executionParams": {
                    "mode": "default",
                    "selector": selector,
                    "runConfigData": {"solids": {"hard_fail_or_0": {"config": {"fail": False}}}},
                    "executionMetadata": {
                        "rootRunId": run_id,
                        "parentRunId": run_id,
                        "tags": [{"key": RESUME_RETRY_TAG, "value": "true"}],
                    },
                }
            },
        )

        run_id = retry.data["launchPipelineReexecution"]["run"]["runId"]
        logs = get_all_logs_for_finished_run_via_subscription(graphql_context, run_id)[
            "pipelineRunLogs"
        ]["messages"]
        assert step_did_succeed(logs, "hard_fail_or_0")
        assert step_did_succeed(logs, "increment")
Example #15
0
    def test_tick_success(self, graphql_context, snapshot):
        context = graphql_context
        instance = context.instance

        repository = context.legacy_get_repository_definition()

        instance.reconcile_scheduler_state(
            repository=repository,
            python_path='/path/to/python',
            repository_path='/path/to/repository',
        )
        schedule_def = repository.get_schedule_def("no_config_pipeline_hourly_schedule")

        start_time = time.time()
        execute_dagster_graphql_and_finish_runs(
            context,
            LAUNCH_SCHEDULED_EXECUTION_QUERY,
            variables={'scheduleName': schedule_def.name},
        )

        # Check tick data and stats through gql
        result = execute_dagster_graphql_and_finish_runs(context, SCHEDULE_TICKS_QUERY)

        assert result.data
        schedule_result = next(
            schedule_result
            for schedule_result in result.data['scheduler']['runningSchedules']
            if schedule_result['scheduleDefinition']['name'] == schedule_def.name
        )

        assert schedule_result
        assert schedule_result['stats']['ticksSucceeded'] == 1
        snapshot.assert_match(schedule_result)

        # Check directly against the DB
        ticks = instance.get_schedule_ticks_by_schedule(repository.name, schedule_def.name)
        assert len(ticks) == 1
        tick = ticks[0]
        assert tick.schedule_name == schedule_def.name
        assert tick.cron_schedule == schedule_def.cron_schedule
        assert tick.timestamp > start_time and tick.timestamp < time.time()
        assert tick.status == ScheduleTickStatus.SUCCESS
        assert tick.run_id
Example #16
0
    def test_partition_based_decorator(self, graphql_context):
        result = execute_dagster_graphql_and_finish_runs(
            graphql_context,
            START_SCHEDULED_EXECUTION_QUERY,
            variables={'scheduleName': 'partition_based_decorator'},
        )

        assert not result.errors
        assert result.data
        assert_start_scheduled_execution_success(result)
Example #17
0
    def test_query_multiple_schedule_ticks(self, graphql_context, snapshot):
        instance = graphql_context.instance
        repository = graphql_context.legacy_get_repository_definition()
        reconcile_scheduler_state("", "", repository, instance)

        for scheduleName in [
                'no_config_pipeline_hourly_schedule',
                'no_config_should_execute',
                'environment_dict_error_schedule',
        ]:
            execute_dagster_graphql_and_finish_runs(
                graphql_context,
                START_SCHEDULED_EXECUTION_QUERY,
                variables={'scheduleName': scheduleName},
            )

        result = execute_dagster_graphql_and_finish_runs(
            graphql_context, SCHEDULE_TICKS_QUERY)
        snapshot.assert_match(result.data['scheduler']['runningSchedules'])
Example #18
0
    def test_launch_scheduled_execution_with_should_execute(self, graphql_context):
        result = execute_dagster_graphql_and_finish_runs(
            graphql_context,
            LAUNCH_SCHEDULED_EXECUTION_QUERY,
            variables={'scheduleName': 'no_config_should_execute'},
        )

        assert not result.errors
        assert result.data

        assert result.data['launchScheduledExecution']['__typename'] == 'ScheduledExecutionBlocked'
Example #19
0
    def test_partition_based_execution(self, graphql_context):
        result = execute_dagster_graphql_and_finish_runs(
            graphql_context,
            LAUNCH_SCHEDULED_EXECUTION_QUERY,
            variables={'scheduleName': 'partition_based'},
        )

        assert not result.errors
        assert result.data

        # just test existence
        assert_launch_scheduled_execution_success(result)

        assert uuid.UUID(result.data['launchScheduledExecution']['run']['runId'])
        assert (
            result.data['launchScheduledExecution']['run']['pipeline']['name']
            == 'no_config_pipeline'
        )

        tags = result.data['launchScheduledExecution']['run']['tags']

        assert any(
            tag['key'] == 'dagster/schedule_name' and tag['value'] == 'partition_based'
            for tag in tags
        )

        assert any(tag['key'] == 'dagster/partition' and tag['value'] == '9' for tag in tags)
        assert any(
            tag['key'] == 'dagster/partition_set' and tag['value'] == 'scheduled_integer_partitions'
            for tag in tags
        )

        result_two = execute_dagster_graphql_and_finish_runs(
            graphql_context,
            LAUNCH_SCHEDULED_EXECUTION_QUERY,
            variables={'scheduleName': 'partition_based'},
        )
        tags = result_two.data['launchScheduledExecution']['run']['tags']
        # the last partition is selected on subsequent runs
        assert any(tag['key'] == 'dagster/partition' and tag['value'] == '9' for tag in tags)
Example #20
0
    def test_tagged_pipeline_schedule(self, graphql_context):
        result = execute_dagster_graphql_and_finish_runs(
            graphql_context,
            START_SCHEDULED_EXECUTION_QUERY,
            variables={'scheduleName': 'tagged_pipeline_schedule'},
        )

        assert not result.errors
        assert_start_scheduled_execution_success(result)
        assert (result.data['startScheduledExecution']['run']['pipeline']
                ['name'] == 'tagged_pipeline')

        assert any(
            tag['key'] == 'foo' and tag['value'] == 'bar'
            for tag in result.data['startScheduledExecution']['run']['tags'])
    def test_pipeline_reexecution_info_query(self, graphql_context, snapshot):
        context = graphql_context
        selector = infer_pipeline_selector(graphql_context, 'csv_hello_world')

        run_id = make_new_run_id()
        execute_dagster_graphql_and_finish_runs(
            context,
            LAUNCH_PIPELINE_EXECUTION_QUERY,
            variables={
                'executionParams': {
                    'selector': selector,
                    'runConfigData': csv_hello_world_solids_config_fs_storage(),
                    'executionMetadata': {'runId': run_id},
                    'mode': 'default',
                }
            },
        )

        # retry
        new_run_id = make_new_run_id()
        execute_dagster_graphql_and_finish_runs(
            context,
            LAUNCH_PIPELINE_REEXECUTION_SNAPSHOT_QUERY,
            variables={
                'executionParams': {
                    'selector': selector,
                    'runConfigData': csv_hello_world_solids_config_fs_storage(),
                    'stepKeys': ['sum_sq_solid.compute'],
                    'executionMetadata': {
                        'runId': new_run_id,
                        'rootRunId': run_id,
                        'parentRunId': run_id,
                        'tags': [{'key': RESUME_RETRY_TAG, 'value': 'true'}],
                    },
                    'mode': 'default',
                }
            },
        )

        result_one = execute_dagster_graphql_and_finish_runs(
            context, PIPELINE_REEXECUTION_INFO_QUERY, variables={'runId': run_id}
        )
        query_result_one = result_one.data['pipelineRunOrError']
        assert query_result_one['__typename'] == 'PipelineRun'
        assert query_result_one['stepKeysToExecute'] is None

        result_two = execute_dagster_graphql_and_finish_runs(
            context, PIPELINE_REEXECUTION_INFO_QUERY, variables={'runId': new_run_id}
        )
        query_result_two = result_two.data['pipelineRunOrError']
        assert query_result_two['__typename'] == 'PipelineRun'
        stepKeysToExecute = query_result_two['stepKeysToExecute']
        assert stepKeysToExecute is not None
        snapshot.assert_match(stepKeysToExecute)
Example #22
0
    def test_pipeline_reexecution_info_query(self, graphql_context, snapshot):
        context = graphql_context
        selector = infer_pipeline_selector(graphql_context, "csv_hello_world")

        run_id = make_new_run_id()
        execute_dagster_graphql_and_finish_runs(
            context,
            LAUNCH_PIPELINE_EXECUTION_MUTATION,
            variables={
                "executionParams": {
                    "selector": selector,
                    "runConfigData": csv_hello_world_solids_config(),
                    "executionMetadata": {"runId": run_id},
                    "mode": "default",
                }
            },
        )

        # retry
        new_run_id = make_new_run_id()
        execute_dagster_graphql_and_finish_runs(
            context,
            LAUNCH_PIPELINE_REEXECUTION_MUTATION,
            variables={
                "executionParams": {
                    "selector": selector,
                    "runConfigData": csv_hello_world_solids_config(),
                    "stepKeys": ["sum_sq_solid"],
                    "executionMetadata": {
                        "runId": new_run_id,
                        "rootRunId": run_id,
                        "parentRunId": run_id,
                        "tags": [{"key": RESUME_RETRY_TAG, "value": "true"}],
                    },
                    "mode": "default",
                }
            },
        )

        result_one = execute_dagster_graphql_and_finish_runs(
            context, PIPELINE_REEXECUTION_INFO_QUERY, variables={"runId": run_id}
        )
        query_result_one = result_one.data["pipelineRunOrError"]
        assert query_result_one["__typename"] == "Run"
        assert query_result_one["stepKeysToExecute"] is None

        result_two = execute_dagster_graphql_and_finish_runs(
            context, PIPELINE_REEXECUTION_INFO_QUERY, variables={"runId": new_run_id}
        )
        query_result_two = result_two.data["pipelineRunOrError"]
        assert query_result_two["__typename"] == "Run"
        stepKeysToExecute = query_result_two["stepKeysToExecute"]
        assert stepKeysToExecute is not None
        snapshot.assert_match(stepKeysToExecute)
Example #23
0
def _do_retry_intermediates_test(graphql_context, run_id, reexecution_run_id):
    selector = get_legacy_pipeline_selector(graphql_context,
                                            'eventually_successful')
    logs = sync_execute_get_events(
        context=graphql_context,
        variables={
            'executionParams': {
                'mode': 'default',
                'selector': selector,
                'executionMetadata': {
                    'runId': run_id
                },
            }
        },
    )

    assert step_did_succeed(logs, 'spawn.compute')
    assert step_did_fail(logs, 'fail.compute')
    assert step_did_skip(logs, 'fail_2.compute')
    assert step_did_skip(logs, 'fail_3.compute')
    assert step_did_skip(logs, 'reset.compute')

    retry_one = execute_dagster_graphql_and_finish_runs(
        graphql_context,
        START_PIPELINE_REEXECUTION_QUERY,
        variables={
            'executionParams': {
                'mode': 'default',
                'selector': selector,
                'executionMetadata': {
                    'runId': reexecution_run_id,
                    'rootRunId': run_id,
                    'parentRunId': run_id,
                    'tags': [{
                        'key': RESUME_RETRY_TAG,
                        'value': 'true'
                    }],
                },
            }
        },
    )

    return retry_one
def _do_retry_intermediates_test(graphql_context, run_id, reexecution_run_id):
    selector = infer_pipeline_selector(graphql_context,
                                       "eventually_successful")
    logs = sync_execute_get_events(
        context=graphql_context,
        variables={
            "executionParams": {
                "mode": "default",
                "selector": selector,
                "executionMetadata": {
                    "runId": run_id
                },
            }
        },
    )

    assert step_did_succeed(logs, "spawn.compute")
    assert step_did_fail(logs, "fail.compute")
    assert step_did_skip(logs, "fail_2.compute")
    assert step_did_skip(logs, "fail_3.compute")
    assert step_did_skip(logs, "reset.compute")

    retry_one = execute_dagster_graphql_and_finish_runs(
        graphql_context,
        LAUNCH_PIPELINE_REEXECUTION_MUTATION,
        variables={
            "executionParams": {
                "mode": "default",
                "selector": selector,
                "executionMetadata": {
                    "runId": reexecution_run_id,
                    "rootRunId": run_id,
                    "parentRunId": run_id,
                    "tags": [{
                        "key": RESUME_RETRY_TAG,
                        "value": "true"
                    }],
                },
            }
        },
    )

    return retry_one
Example #25
0
    def test_partition_based_multi_mode_decorator(self, graphql_context):
        result = execute_dagster_graphql_and_finish_runs(
            graphql_context,
            START_SCHEDULED_EXECUTION_QUERY,
            variables={'scheduleName': 'partition_based_multi_mode_decorator'},
        )

        assert not result.errors
        assert result.data
        assert_start_scheduled_execution_success(result)
        run_id = result.data['startScheduledExecution']['run']['runId']

        logs = get_all_logs_for_finished_run_via_subscription(
            graphql_context, run_id)['pipelineRunLogs']['messages']
        execution_step_names = [
            log['step']['key'] for log in logs
            if log['__typename'] == 'ExecutionStepStartEvent'
        ]
        assert execution_step_names == ['return_six.compute']
Example #26
0
    def test_get_partition_runs(self, graphql_context):
        repository_selector = infer_repository_selector(graphql_context)
        result = execute_dagster_graphql_and_finish_runs(
            graphql_context,
            LAUNCH_PARTITION_BACKFILL_MUTATION,
            variables={
                "backfillParams": {
                    "selector": {
                        "repositorySelector": repository_selector,
                        "partitionSetName": "integer_partition",
                    },
                    "partitionNames": ["2", "3"],
                    "forceSynchronousSubmission": True,
                }
            },
        )
        assert not result.errors
        assert result.data["launchPartitionBackfill"][
            "__typename"] == "PartitionBackfillSuccess"
        assert len(
            result.data["launchPartitionBackfill"]["launchedRunIds"]) == 2
        run_ids = result.data["launchPartitionBackfill"]["launchedRunIds"]

        result = execute_dagster_graphql(
            graphql_context,
            query=GET_PARTITION_SET_RUNS_QUERY,
            variables={
                "partitionSetName": "integer_partition",
                "repositorySelector": repository_selector,
            },
        )
        assert not result.errors
        assert result.data
        partitions = result.data["partitionSetOrError"]["partitionsOrError"][
            "results"]
        assert len(partitions) == 10
        for partition in partitions:
            if partition["name"] not in ("2", "3"):
                assert len(partition["runs"]) == 0
            else:
                assert len(partition["runs"]) == 1
                assert partition["runs"][0]["runId"] in run_ids
Example #27
0
    def test_launch_partial_backfill(self, graphql_context):
        # execute a full pipeline, without the failure environment variable
        repository_selector = infer_repository_selector(graphql_context)
        partition_set_selector = {
            "repositorySelector": repository_selector,
            "partitionSetName": "chained_integer_partition",
        }

        # reexecute a partial pipeline
        partial_steps = ["after_failure"]
        result = execute_dagster_graphql_and_finish_runs(
            graphql_context,
            LAUNCH_PARTITION_BACKFILL_MUTATION,
            variables={
                "backfillParams": {
                    "selector": partition_set_selector,
                    "partitionNames": ["2", "3"],
                    "reexecutionSteps": partial_steps,
                }
            },
        )
        assert not result.errors
        assert result.data
        assert result.data["launchPartitionBackfill"][
            "__typename"] == "PartitionBackfillSuccess"
        backfill_id = result.data["launchPartitionBackfill"]["backfillId"]

        result = execute_dagster_graphql(graphql_context,
                                         PARTITION_PROGRESS_QUERY,
                                         variables={"backfillId": backfill_id})

        assert not result.errors
        assert result.data
        assert result.data["partitionBackfillOrError"][
            "__typename"] == "PartitionBackfill"
        assert result.data["partitionBackfillOrError"]["status"] == "REQUESTED"
        assert result.data["partitionBackfillOrError"]["isPersisted"]
        assert result.data["partitionBackfillOrError"]["numRequested"] == 0
        assert result.data["partitionBackfillOrError"]["numTotal"] == 2
        assert result.data["partitionBackfillOrError"]["reexecutionSteps"] == [
            "after_failure"
        ]
Example #28
0
    def test_tagged_pipeline_scheduled_execution_with_run_launcher(self, graphql_context):
        result = execute_dagster_graphql_and_finish_runs(
            graphql_context,
            LAUNCH_SCHEDULED_EXECUTION_QUERY,
            variables={'scheduleName': 'tagged_pipeline_schedule'},
        )

        assert not result.errors
        assert result.data

        # just test existence
        assert_launch_scheduled_execution_success(result)

        assert uuid.UUID(result.data['launchScheduledExecution']['run']['runId'])
        assert (
            result.data['launchScheduledExecution']['run']['pipeline']['name'] == 'tagged_pipeline'
        )

        assert any(
            tag['key'] == 'foo' and tag['value'] == 'bar'
            for tag in result.data['launchScheduledExecution']['run']['tags']
        )
Example #29
0
    def test_just_basic_launch_scheduled_execution(self, graphql_context):
        result = execute_dagster_graphql_and_finish_runs(
            graphql_context,
            LAUNCH_SCHEDULED_EXECUTION_QUERY,
            variables={'scheduleName': 'no_config_pipeline_hourly_schedule'},
        )

        assert not result.errors
        assert result.data
        assert_launch_scheduled_execution_success(result)

        assert uuid.UUID(result.data['launchScheduledExecution']['run']['runId'])
        assert (
            result.data['launchScheduledExecution']['run']['pipeline']['name']
            == 'no_config_pipeline'
        )

        assert any(
            tag['key'] == 'dagster/schedule_name'
            and tag['value'] == 'no_config_pipeline_hourly_schedule'
            for tag in result.data['launchScheduledExecution']['run']['tags']
        )
    def test_retry_pipeline_execution(self, graphql_context):
        selector = infer_pipeline_selector(graphql_context,
                                           "eventually_successful")
        result = execute_dagster_graphql_and_finish_runs(
            graphql_context,
            LAUNCH_PIPELINE_EXECUTION_MUTATION,
            variables={
                "executionParams": {
                    "mode": "default",
                    "selector": selector,
                    "runConfigData": retry_config(0),
                }
            },
        )

        run_id = result.data["launchPipelineExecution"]["run"]["runId"]
        logs = get_all_logs_for_finished_run_via_subscription(
            graphql_context, run_id)["pipelineRunLogs"]["messages"]

        assert step_did_succeed(logs, "spawn.compute")
        assert step_did_fail(logs, "fail.compute")
        assert step_did_skip(logs, "fail_2.compute")
        assert step_did_skip(logs, "fail_3.compute")
        assert step_did_skip(logs, "reset.compute")

        retry_one = execute_dagster_graphql_and_finish_runs(
            graphql_context,
            LAUNCH_PIPELINE_REEXECUTION_MUTATION,
            variables={
                "executionParams": {
                    "mode": "default",
                    "selector": selector,
                    "runConfigData": retry_config(1),
                    "executionMetadata": {
                        "rootRunId": run_id,
                        "parentRunId": run_id,
                        "tags": [{
                            "key": RESUME_RETRY_TAG,
                            "value": "true"
                        }],
                    },
                }
            },
        )

        run_id = retry_one.data["launchPipelineReexecution"]["run"]["runId"]
        logs = get_all_logs_for_finished_run_via_subscription(
            graphql_context, run_id)["pipelineRunLogs"]["messages"]
        assert step_did_not_run(logs, "spawn.compute")
        assert step_did_succeed(logs, "fail.compute")
        assert step_did_fail(logs, "fail_2.compute")
        assert step_did_skip(logs, "fail_3.compute")
        assert step_did_skip(logs, "reset.compute")

        retry_two = execute_dagster_graphql_and_finish_runs(
            graphql_context,
            LAUNCH_PIPELINE_REEXECUTION_MUTATION,
            variables={
                "executionParams": {
                    "mode": "default",
                    "selector": selector,
                    "runConfigData": retry_config(2),
                    "executionMetadata": {
                        "rootRunId": run_id,
                        "parentRunId": run_id,
                        "tags": [{
                            "key": RESUME_RETRY_TAG,
                            "value": "true"
                        }],
                    },
                }
            },
        )

        run_id = retry_two.data["launchPipelineReexecution"]["run"]["runId"]
        logs = get_all_logs_for_finished_run_via_subscription(
            graphql_context, run_id)["pipelineRunLogs"]["messages"]

        assert step_did_not_run(logs, "spawn.compute")
        assert step_did_not_run(logs, "fail.compute")
        assert step_did_succeed(logs, "fail_2.compute")
        assert step_did_fail(logs, "fail_3.compute")
        assert step_did_skip(logs, "reset.compute")

        retry_three = execute_dagster_graphql_and_finish_runs(
            graphql_context,
            LAUNCH_PIPELINE_REEXECUTION_MUTATION,
            variables={
                "executionParams": {
                    "mode": "default",
                    "selector": selector,
                    "runConfigData": retry_config(3),
                    "executionMetadata": {
                        "rootRunId": run_id,
                        "parentRunId": run_id,
                        "tags": [{
                            "key": RESUME_RETRY_TAG,
                            "value": "true"
                        }],
                    },
                }
            },
        )

        run_id = retry_three.data["launchPipelineReexecution"]["run"]["runId"]
        logs = get_all_logs_for_finished_run_via_subscription(
            graphql_context, run_id)["pipelineRunLogs"]["messages"]

        assert step_did_not_run(logs, "spawn.compute")
        assert step_did_not_run(logs, "fail.compute")
        assert step_did_not_run(logs, "fail_2.compute")
        assert step_did_succeed(logs, "fail_3.compute")
        assert step_did_succeed(logs, "reset.compute")