Example #1
0
def test_memoization_on_celery_k8s(  # pylint: disable=redefined-outer-name
        dagster_docker_image, dagster_instance, helm_namespace, dagit_url):
    ephemeral_prefix = str(uuid.uuid4())
    run_config = deep_merge_dicts(
        merge_yamls([
            os.path.join(get_test_project_environments_path(), "env_s3.yaml")
        ]),
        get_celery_engine_config(dagster_docker_image=dagster_docker_image,
                                 job_namespace=helm_namespace),
    )
    run_config = deep_merge_dicts(
        run_config,
        {
            "resources": {
                "io_manager": {
                    "config": {
                        "s3_prefix": ephemeral_prefix
                    }
                }
            }
        },
    )

    try:

        run_ids = []
        for _ in range(2):
            run_id = launch_run_over_graphql(
                dagit_url,
                run_config=run_config,
                pipeline_name="memoization_pipeline",
                mode="celery",
            )

            result = wait_for_job_and_get_raw_logs(job_name="dagster-run-%s" %
                                                   run_id,
                                                   namespace=helm_namespace)

            assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(
                result)

            run_ids.append(run_id)

        unmemoized_run_id = run_ids[0]
        step_events = _get_step_events(
            dagster_instance.all_logs(unmemoized_run_id))
        assert len(step_events) == 4

        memoized_run_id = run_ids[1]
        step_events = _get_step_events(
            dagster_instance.all_logs(memoized_run_id))
        assert len(step_events) == 0

    finally:
        cleanup_memoized_results(define_memoization_pipeline(), "celery",
                                 dagster_instance, run_config)
Example #2
0
def daily_weather_ingest_schedule(date):
    unix_seconds_since_epoch = int(
        (date - datetime(year=1970, month=1, day=1)).total_seconds())
    return deep_merge_dicts(
        {
            "solids": {
                "weather_etl": {
                    "solids": {
                        "download_weather_report_from_weather_api": {
                            "inputs": {
                                "epoch_date": {
                                    "value": unix_seconds_since_epoch
                                }
                            }
                        },
                    },
                }
            }
        },
        {
            "resources": weather_etl_environment["resources"],
            "solids": {
                "weather_etl": weather_etl_environment["solids"]["weather_etl"]
            },
        },
    )
Example #3
0
def monthly_trip_ingest_schedule(date):
    return deep_merge_dicts(
        {
            "solids": {
                "trip_etl": {
                    "solids": {
                        "download_baybike_zipfile_from_url": {
                            "inputs": {
                                "file_name": {
                                    "value":
                                    "{}-fordgobike-tripdata.csv.zip".format(
                                        date.date().strftime("%Y%m"))
                                }
                            }
                        }
                    }
                }
            }
        },
        {
            "resources": trip_etl_environment["resources"],
            "solids": {
                "trip_etl": trip_etl_environment["solids"]["trip_etl"]
            },
        },
    )
Example #4
0
    def update_job(self, job_id: int, **kwargs) -> Dict[str, Any]:
        """
        Updates specific properties of a dbt job. Documentation on the full set of potential
        parameters can be found here: https://docs.getdbt.com/dbt-cloud/api-v2#operation/updateJobById

        Args:
            job_id (int): The ID of the relevant dbt Cloud job. You can find this value by going to
                the details page of your job in the dbt Cloud UI. It will be the final number in the
                url, e.g.: ``https://cloud.getdbt.com/#/accounts/{account_id}/projects/{project_id}/jobs/{job_id}/``
            kwargs: Passed in as the properties to be changed.
        Returns:
            Dict[str, Any]: Parsed json data from the response to this request

        Examples:

        .. code-block:: python

            # disable schedule for job with id=12345
            my_dbt_cloud_resource.update_job(12345, triggers={"schedule": False})
        """
        # API requires you to supply a bunch of values, so we can just use the current state
        # as the defaults
        job_data = self.get_job(job_id)
        return self.make_request("POST",
                                 f"{self._account_id}/jobs/{job_id}/",
                                 data=deep_merge_dicts(job_data, kwargs))
Example #5
0
def daily_weather_ingest_schedule(date):
    unix_seconds_since_epoch = int(
        (date - datetime(year=1970, month=1, day=1)).total_seconds())
    return deep_merge_dicts(
        {
            'solids': {
                'weather_etl': {
                    'solids': {
                        'download_weather_report_from_weather_api': {
                            'inputs': {
                                'epoch_date': {
                                    'value': unix_seconds_since_epoch
                                }
                            }
                        },
                    },
                }
            }
        },
        {
            'resources': weather_etl_environment['resources'],
            'solids': {
                'weather_etl': weather_etl_environment['solids']['weather_etl']
            },
        },
    )
Example #6
0
def test_pyspark_databricks(mock_wait, mock_get_step_events, mock_put_file,
                            mock_submit_run):
    result = execute_pipeline(
        pipeline=reconstructable(define_do_nothing_pipe),
        mode='prod_s3',
        environment_dict={
            'resources': {
                'pyspark_step_launcher': {
                    'config':
                    deep_merge_dicts(
                        BASE_DATABRICKS_PYSPARK_STEP_LAUNCHER_CONFIG,
                        {
                            'databricks_host': '',
                            'databricks_token': ''
                        },
                    ),
                },
            },
        },
    )
    assert result.success
    assert mock_wait.call_count == 1
    assert mock_get_step_events.call_count == 1
    # TODO: uncomment this with correct value when uploaded packages are more stable
    assert mock_put_file.call_count == 3
    assert mock_submit_run.call_count == 1
def make_environment_dict(scratch_dir, mode):
    if mode in ['external', 'request_retry']:
        step_launcher_resource_keys = [
            'first_step_launcher', 'second_step_launcher'
        ]
    else:
        step_launcher_resource_keys = ['second_step_launcher']
    return deep_merge_dicts(
        ENVIRONMENT_DICT_BASE,
        {
            'resources': {
                step_launcher_resource_key: {
                    'config': {
                        'scratch_dir': scratch_dir
                    }
                }
                for step_launcher_resource_key in step_launcher_resource_keys
            },
            'storage': {
                'filesystem': {
                    'config': {
                        'base_dir': scratch_dir
                    }
                }
            },
        },
    )
Example #8
0
def test_pyspark_databricks(mock_wait, mock_get_step_events, mock_put_file,
                            mock_submit_run):
    result = execute_pipeline(
        pipeline=reconstructable(define_do_nothing_pipe),
        mode="prod_s3",
        run_config={
            "resources": {
                "pyspark_step_launcher": {
                    "config":
                    deep_merge_dicts(
                        BASE_DATABRICKS_PYSPARK_STEP_LAUNCHER_CONFIG,
                        {
                            "databricks_host": "",
                            "databricks_token": ""
                        },
                    ),
                },
            },
        },
    )
    assert result.success
    assert mock_wait.call_count == 1
    assert mock_get_step_events.call_count == 1
    # TODO: uncomment this with correct value when uploaded packages are more stable
    assert mock_put_file.call_count == 3
    assert mock_submit_run.call_count == 1
Example #9
0
def make_run_config(scratch_dir, mode):
    if mode in ['external', 'request_retry']:
        step_launcher_resource_keys = [
            'first_step_launcher', 'second_step_launcher'
        ]
    else:
        step_launcher_resource_keys = ['second_step_launcher']
    return deep_merge_dicts(
        RUN_CONFIG_BASE,
        {
            'resources': {
                step_launcher_resource_key: {
                    'config': {
                        'scratch_dir': scratch_dir
                    }
                }
                for step_launcher_resource_key in step_launcher_resource_keys
            },
            'storage': {
                'filesystem': {
                    'config': {
                        'base_dir': scratch_dir
                    }
                }
            },
        },
    )
Example #10
0
def monthly_trip_ingest_schedule(date):
    return deep_merge_dicts(
        {
            'solids': {
                'trip_etl': {
                    'solids': {
                        'download_baybike_zipfile_from_url': {
                            'inputs': {
                                'file_name': {
                                    'value':
                                    '{}-fordgobike-tripdata.csv.zip'.format(
                                        date.date().strftime('%Y%m'))
                                }
                            }
                        }
                    }
                }
            }
        },
        {
            'resources': trip_etl_environment['resources'],
            'solids': {
                'trip_etl': trip_etl_environment['solids']['trip_etl']
            },
        },
    )
Example #11
0
def sample_runs_details(include_related=None, **kwargs):
    runs = [sample_run_details(include_related, **kwargs) for i in range(100)]
    if include_related and "environment" in include_related:
        for run in runs:
            run["environment"] = {
                "dbt_project_subdirectory": None,
                "project_id": 50000,
                "id": 47000,
                "account_id": SAMPLE_ACCOUNT_ID,
                "connection_id": 56000,
                "repository_id": 58000,
                "credentials_id": 52000,
                "created_by_id": None,
                "name": "dbt-environment",
                "use_custom_branch": False,
                "custom_branch": None,
                "dbt_version": "0.21.0",
                "supports_docs": False,
                "state": 10,
            }
            run = deep_merge_dicts(run, kwargs)
    return {
        "status": {
            "code": 200,
            "is_success": True,
            "user_message": "Success!",
            "developer_message": "",
        },
        "data": runs,
    }
Example #12
0
def test_realistic():
    from_dict = {
        'context': {
            'unittest': {
                'resources': {
                    'db_resource': {'config': {'user': '******', 'password': '******'}}
                }
            }
        }
    }

    onto_dict = {'context': {'unittest': {'resources': {'another': {'config': 'not_sensitive'}}}}}

    result_dict = {
        'context': {
            'unittest': {
                'resources': {
                    'db_resource': {'config': {'user': '******', 'password': '******'}},
                    'another': {'config': 'not_sensitive'},
                }
            }
        }
    }

    assert deep_merge_dicts(onto_dict, from_dict) == result_dict
Example #13
0
def test_realistic():
    from_dict = {
        "context": {
            "unittest": {
                "resources": {
                    "db_resource": {"config": {"user": "******", "password": "******"}}
                }
            }
        }
    }

    onto_dict = {"context": {"unittest": {"resources": {"another": {"config": "not_sensitive"}}}}}

    result_dict = {
        "context": {
            "unittest": {
                "resources": {
                    "db_resource": {"config": {"user": "******", "password": "******"}},
                    "another": {"config": "not_sensitive"},
                }
            }
        }
    }

    assert deep_merge_dicts(onto_dict, from_dict) == result_dict
Example #14
0
def make_run_config(scratch_dir, mode):
    if mode in ["external", "request_retry"]:
        step_launcher_resource_keys = [
            "first_step_launcher", "second_step_launcher"
        ]
    else:
        step_launcher_resource_keys = ["second_step_launcher"]
    return deep_merge_dicts(
        RUN_CONFIG_BASE,
        {
            "resources":
            merge_dicts(
                {"io_manager": {
                    "config": {
                        "base_dir": scratch_dir
                    }
                }},
                {
                    step_launcher_resource_key: {
                        "config": {
                            "scratch_dir": scratch_dir
                        }
                    }
                    for step_launcher_resource_key in
                    step_launcher_resource_keys
                },
            ),
        },
    )
Example #15
0
def test_pyspark_databricks(mock_wait, mock_get_step_events, mock_put_file,
                            mock_submit_run):
    mock_get_step_events.return_value = execute_pipeline(
        pipeline=reconstructable(define_do_nothing_pipe),
        mode="local").events_by_step_key["do_nothing_solid"]

    result = execute_pipeline(
        pipeline=reconstructable(define_do_nothing_pipe),
        mode="prod_s3",
        run_config={
            "resources": {
                "pyspark_step_launcher": {
                    "config":
                    deep_merge_dicts(
                        BASE_DATABRICKS_PYSPARK_STEP_LAUNCHER_CONFIG,
                        {
                            "databricks_host": "",
                            "databricks_token": ""
                        },
                    ),
                },
            },
        },
    )
    assert result.success
    assert mock_wait.call_count == 1
    assert mock_get_step_events.call_count == 1
    assert mock_put_file.call_count == 4
    assert mock_submit_run.call_count == 1
Example #16
0
def test_nested_merge():
    from_dict = {"key": {"nested_one": 1}}

    onto_dict = {"key": {"nested_two": 2}}

    assert deep_merge_dicts(onto_dict, from_dict) == {
        "key": {
            "nested_one": 1,
            "nested_two": 2
        }
    }
Example #17
0
    def run_pipeline(self,
                     pipeline: PipelineDefinition,
                     config_name: str,
                     extra_config: dict[str, Any] = {},
                     pipeline_mode='test') -> PipelineExecutionResult:
        config_dict = load_yaml_from_globs(config_path(config_name))
        config_dict = deep_merge_dicts(config_dict, extra_config)

        return execute_pipeline(pipeline,
                                run_config=config_dict,
                                mode=pipeline_mode)
Example #18
0
def test_pyspark_databricks(mock_get_run_state, mock_get_step_events,
                            mock_put_file, mock_read_file, mock_submit_run):
    mock_submit_run.return_value = 12345
    mock_read_file.return_value = "somefilecontents".encode()

    running_state = DatabricksRunState(DatabricksRunLifeCycleState.Running,
                                       None, "")
    final_state = DatabricksRunState(DatabricksRunLifeCycleState.Terminated,
                                     DatabricksRunResultState.Success, "")
    mock_get_run_state.side_effect = [running_state] * 5 + [final_state]

    with instance_for_test() as instance:
        execute_pipeline(pipeline=reconstructable(define_do_nothing_pipe),
                         mode="local",
                         instance=instance)
        mock_get_step_events.return_value = [
            record.event_log_entry for record in instance.get_event_records()
            if record.event_log_entry.step_key == "do_nothing_solid"
        ]
    config = BASE_DATABRICKS_PYSPARK_STEP_LAUNCHER_CONFIG.copy()
    config.pop("local_pipeline_package_path")
    result = execute_pipeline(
        pipeline=reconstructable(define_do_nothing_pipe),
        mode="test",
        run_config={
            "resources": {
                "pyspark_step_launcher": {
                    "config":
                    deep_merge_dicts(
                        config,
                        {
                            "databricks_host":
                            "",
                            "databricks_token":
                            "",
                            "poll_interval_sec":
                            0.1,
                            "local_dagster_job_package_path":
                            os.path.abspath(os.path.dirname(__file__)),
                        },
                    ),
                },
            },
        },
    )
    assert result.success
    assert mock_get_run_state.call_count == 6
    assert mock_get_step_events.call_count == 6
    assert mock_put_file.call_count == 4
    assert mock_read_file.call_count == 2
    assert mock_submit_run.call_count == 1
Example #19
0
    def with_additional_config(self, run_config):
        """Return a new PresetDefinition with additional config merged in to the existing config."""

        check.opt_nullable_dict_param(run_config, "run_config")
        if run_config is None:
            return self
        else:
            return PresetDefinition(
                name=self.name,
                solid_selection=self.solid_selection,
                mode=self.mode,
                tags=self.tags,
                run_config=deep_merge_dicts(self.run_config, run_config),
            )
Example #20
0
def test_pyspark_emr(mock_is_emr_step_complete, mock_read_events,
                     mock_s3_bucket):
    mock_read_events.return_value = execute_pipeline(
        reconstructable(define_do_nothing_pipe),
        mode="local").events_by_step_key["do_nothing_solid"]

    run_job_flow_args = dict(
        Instances={
            "InstanceCount": 1,
            "KeepJobFlowAliveWhenNoSteps": True,
            "MasterInstanceType": "c3.medium",
            "Placement": {
                "AvailabilityZone": "us-west-1a"
            },
            "SlaveInstanceType": "c3.xlarge",
        },
        JobFlowRole="EMR_EC2_DefaultRole",
        LogUri="s3://{bucket}/log".format(bucket=mock_s3_bucket.name),
        Name="cluster",
        ServiceRole="EMR_DefaultRole",
        VisibleToAllUsers=True,
    )

    # Doing cluster setup outside of a solid here, because run_job_flow is not yet plumbed through
    # to the pyspark EMR resource.
    job_runner = EmrJobRunner(region="us-west-1")
    context = create_test_pipeline_execution_context()
    cluster_id = job_runner.run_job_flow(context.log, run_job_flow_args)

    result = execute_pipeline(
        pipeline=reconstructable(define_do_nothing_pipe),
        mode="prod",
        run_config={
            "resources": {
                "pyspark_step_launcher": {
                    "config":
                    deep_merge_dicts(
                        BASE_EMR_PYSPARK_STEP_LAUNCHER_CONFIG,
                        {
                            "cluster_id": cluster_id,
                            "staging_bucket": mock_s3_bucket.name
                        },
                    ),
                }
            },
        },
    )
    assert result.success
    assert mock_is_emr_step_complete.called
Example #21
0
def run_config_storage_field_backcompat(run_config):
    """This method will be removed after "storage" is removed in run config.

    For backwards compatibility, we treat "storage" as as alias of "intermediate_storage", i.e.
    run config that has been passed in through the "storage" entry will be used to define intermediate
    storage. When "storage" and "intermediate_storage" are both specified, intermediate storage config
    will override storage config.

    Tracking https://github.com/dagster-io/dagster/issues/3280
    """

    intermediate_storage_dict = {}
    if run_config.get("storage"):
        intermediate_storage_dict = {
            "intermediate_storage":
            run_config.get("intermediate_storage") or run_config.get("storage")
        }
    return deep_merge_dicts(run_config, intermediate_storage_dict)
Example #22
0
def test_pyspark_emr(mock_wait, mock_get_step_events):
    run_job_flow_args = dict(
        Instances={
            'InstanceCount': 1,
            'KeepJobFlowAliveWhenNoSteps': True,
            'MasterInstanceType': 'c3.medium',
            'Placement': {
                'AvailabilityZone': 'us-west-1a'
            },
            'SlaveInstanceType': 'c3.xlarge',
        },
        JobFlowRole='EMR_EC2_DefaultRole',
        LogUri='s3://mybucket/log',
        Name='cluster',
        ServiceRole='EMR_DefaultRole',
        VisibleToAllUsers=True,
    )

    # Doing cluster setup outside of a solid here, because run_job_flow is not yet plumbed through
    # to the pyspark EMR resource.
    job_runner = EmrJobRunner(region='us-west-1')
    context = create_test_pipeline_execution_context()
    cluster_id = job_runner.run_job_flow(context.log, run_job_flow_args)

    pipeline_def = ExecutionTargetHandle.for_pipeline_fn(
        define_do_nothing_pipe).build_pipeline_definition()
    result = execute_pipeline(
        pipeline=pipeline_def,
        mode='prod',
        environment_dict={
            'resources': {
                'pyspark_step_launcher': {
                    'config':
                    deep_merge_dicts(BASE_EMR_PYSPARK_STEP_LAUNCHER_CONFIG,
                                     {'cluster_id': cluster_id}),
                }
            },
        },
    )
    assert result.success
    assert mock_wait.called_once
    assert mock_get_step_events.called_once
Example #23
0
def test_volume_mounts(dagster_docker_image, dagster_instance, helm_namespace,
                       dagit_url):
    run_config = deep_merge_dicts(
        merge_yamls([
            os.path.join(get_test_project_environments_path(), "env_s3.yaml")
        ]),
        get_celery_engine_config(dagster_docker_image=dagster_docker_image,
                                 job_namespace=helm_namespace),
    )

    run_id = launch_run_over_graphql(
        dagit_url,
        run_config=run_config,
        pipeline_name="volume_mount_pipeline",
        mode="celery",
    )

    result = wait_for_job_and_get_raw_logs(job_name="dagster-run-%s" % run_id,
                                           namespace=helm_namespace)

    assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(result)
Example #24
0
def run_config_storage_field_backcompat(run_config):
    """This method will be removed after "storage" is removed in run config.

    For backwards compatibility, we treat "storage" as as alias of "intermediate_storage", i.e.
    run config that has been passed in through the "storage" entry will be used to define intermediate
    storage. When "storage" and "intermediate_storage" are both specified, intermediate storage config
    will override storage config.

    Tracking https://github.com/dagster-io/dagster/issues/3280
    """

    intermediate_storage_dict = {}
    if run_config.get("storage"):
        warnings.warn((
            'the "storage" entry in the run config is deprecated and will removed in the '
            'dagster 0.11.0 release. Please use "intermediate_storage" instead and update '
            "the corresponding `system_storage_defs` argument in `ModeDefinition` to "
            "`intermediate_storage_defs`."))
        intermediate_storage_dict = {
            "intermediate_storage":
            run_config.get("intermediate_storage") or run_config.get("storage")
        }
    return deep_merge_dicts(run_config, intermediate_storage_dict)
Example #25
0
def get_sample_connector_response(**kwargs):
    return deep_merge_dicts(
        {
            "code": "Success",
            "data": {
                "id": DEFAULT_CONNECTOR_ID,
                "group_id": "some_group",
                "service": "some_service",
                "service_version": 1,
                "schema": "some_service.some_name",
                "connected_by": "some_user",
                "created_at": "2021-01-01T00:00:00.0Z",
                "succeeded_at": "2021-01-01T01:00:00.0Z",
                "failed_at": None,
                "paused": False,
                "pause_after_trial": False,
                "sync_frequency": 360,
                "schedule_type": "auto",
                "status": {
                    "setup_state": "connected",
                    "sync_state": "scheduled",
                    "update_state": "on_schedule",
                    "is_historical_sync": False,
                    "tasks": [],
                    "warnings": [],
                },
                "config": {
                    "auth_type": "OAuth",
                },
                "source_sync_details": {
                    "last_synced": "2021-10-27T16:58:40.035Z"
                },
            },
        },
        kwargs,
    )
Example #26
0
def test_simple_merge():
    assert deep_merge_dicts({}, {}) == {}
    assert deep_merge_dicts({1: 2}, {}) == {1: 2}
    assert deep_merge_dicts({}, {1: 2}) == {1: 2}
Example #27
0
def test_smash():
    from_dict = {"value": "smasher"}
    onto_dict = {"value": "got_smashed"}

    assert deep_merge_dicts(onto_dict, from_dict)["value"] == "smasher"
Example #28
0
def test_nested_merge():
    from_dict = {'key': {'nested_one': 1}}

    onto_dict = {'key': {'nested_two': 2}}

    assert deep_merge_dicts(onto_dict, from_dict) == {'key': {'nested_one': 1, 'nested_two': 2}}
Example #29
0
def sample_run_details(include_related=None, **kwargs):
    base_data = {
        "id": SAMPLE_RUN_ID,
        "trigger_id": 33000000,
        "account_id": SAMPLE_ACCOUNT_ID,
        "environment_id": 47000,
        "project_id": 50000,
        "job_definition_id": SAMPLE_JOB_ID,
        "status": 10,
        "dbt_version": "0.21.0",
        "git_branch": "master",
        "git_sha": "a32e8239326887421f1314ee0890e5174c8f644a",
        "status_message": None,
        "owner_thread_id": None,
        "executed_by_thread_id": "dbt-run-3000000-6v5pm",
        "deferring_run_id": None,
        "artifacts_saved": True,
        "artifact_s3_path": "prod/runs/3000000/artifacts/target",
        "has_docs_generated": False,
        "has_sources_generated": False,
        "notifications_sent": True,
        "blocked_by": [],
        "scribe_enabled": True,
        "created_at": "2021-11-01 22:47:48.501943+00:00",
        "updated_at": "2021-11-01 22:48:44.860334+00:00",
        "dequeued_at": "2021-11-01 22:48:22.880352+00:00",
        "started_at": "2021-11-01 22:48:28.595439+00:00",
        "finished_at": "2021-11-01 22:48:44.684263+00:00",
        "last_checked_at": None,
        "last_heartbeat_at": None,
        "should_start_at": "2021-11-01 22:47:48.501943+00:00",
        "trigger": None,
        "job": None,
        "environment": None,
        "run_steps": [],
        "status_humanized": "Success",
        "in_progress": False,
        "is_complete": True,
        "is_success": True,
        "is_error": False,
        "is_cancelled": False,
        "href": "https://cloud.getdbt.com/#/accounts/30000/projects/50000/runs/3000000/",
        "duration": "00:00:56",
        "queued_duration": "00:00:40",
        "run_duration": "00:00:16",
        "duration_humanized": "56 seconds",
        "queued_duration_humanized": "40 seconds",
        "run_duration_humanized": "16 seconds",
        "created_at_humanized": "26 minutes, 12 seconds ago",
        "finished_at_humanized": "25 minutes, 16 seconds ago",
        "job_id": SAMPLE_JOB_ID,
    }
    if include_related:
        if "trigger" in include_related:
            base_data["trigger"] = {
                "id": 33149624,
                "cause": "Triggered via Dagster",
                "job_definition_id": SAMPLE_JOB_ID,
                "git_branch": None,
                "git_sha": None,
                "github_pull_request_id": None,
                "gitlab_merge_request_id": None,
                "schema_override": None,
                "dbt_version_override": None,
                "threads_override": None,
                "target_name_override": None,
                "generate_docs_override": None,
                "timeout_seconds_override": None,
                "steps_override": None,
                "created_at": "2021-11-01 22:47:48.494450+00:00",
                "cause_humanized": "Triggered via Dagster",
                "job": None,
            }
        if "job" in include_related:
            base_data["job"] = {
                "execution": {"timeout_seconds": 0},
                "generate_docs": False,
                "run_generate_sources": False,
                "id": SAMPLE_JOB_ID,
                "account_id": SAMPLE_ACCOUNT_ID,
                "project_id": 50000,
                "environment_id": 47071,
                "name": "MyCoolJob",
                "dbt_version": None,
                "created_at": "2021-10-29T21:35:33.278228Z",
                "updated_at": "2021-11-01T23:03:20.887248Z",
                "execute_steps": ["dbt run"],
                "state": 1,
                "deferring_job_definition_id": None,
                "lifecycle_webhooks": False,
                "lifecycle_webhooks_url": None,
                "triggers": {
                    "github_webhook": False,
                    "git_provider_webhook": False,
                    "custom_branch_only": False,
                    "schedule": False,
                },
                "settings": {"threads": 4, "target_name": "default"},
            }
    return {
        "status": {
            "code": 200,
            "is_success": True,
            "user_message": "Success!",
            "developer_message": "",
        },
        "data": deep_merge_dicts(base_data, kwargs),
    }
Example #30
0
def test_memoization_k8s_executor(
    dagster_instance_for_k8s_run_launcher,
    helm_namespace_for_k8s_run_launcher,
    dagster_docker_image,
    dagit_url_for_k8s_run_launcher,
):
    ephemeral_path = str(uuid.uuid4())
    run_config = deep_merge_dicts(
        load_yaml_from_path(
            os.path.join(get_test_project_environments_path(), "env_s3.yaml")),
        {
            "execution": {
                "k8s": {
                    "config": {
                        "job_namespace": helm_namespace_for_k8s_run_launcher,
                        "job_image": dagster_docker_image,
                        "image_pull_policy": image_pull_policy(),
                    }
                }
            },
        },
    )

    run_config = deep_merge_dicts(
        run_config,
        {
            "resources": {
                "io_manager": {
                    "config": {
                        "s3_prefix": ephemeral_path
                    }
                }
            }
        },
    )

    # wrap in try-catch to ensure that memoized results are always cleaned from s3 bucket
    try:
        pipeline_name = "memoization_pipeline"

        run_ids = []
        for _ in range(2):
            run_id = launch_run_over_graphql(
                dagit_url_for_k8s_run_launcher,
                run_config=run_config,
                pipeline_name=pipeline_name,
                mode="k8s",
            )

            result = wait_for_job_and_get_raw_logs(
                job_name="dagster-run-%s" % run_id,
                namespace=helm_namespace_for_k8s_run_launcher,
            )

            assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(
                result)

            run_ids.append(run_id)

        # We expect that first run should have to run the step, since it has not yet been
        # memoized.
        unmemoized_run_id = run_ids[0]
        events = dagster_instance_for_k8s_run_launcher.all_logs(
            unmemoized_run_id)
        assert len(_get_step_execution_events(events)) == 1

        # We expect that second run should not have to run the step, since it has been memoized.
        memoized_run_id = run_ids[1]
        events = dagster_instance_for_k8s_run_launcher.all_logs(
            memoized_run_id)
        assert len(_get_step_execution_events(events)) == 0
    finally:
        cleanup_memoized_results(define_memoization_pipeline(), "k8s",
                                 dagster_instance_for_k8s_run_launcher,
                                 run_config)