Ejemplo n.º 1
0
def test_resource_requirements_pass():
    called = {}

    @system_storage(required_resource_keys={'yup'})
    def storage_with_req(init_context):
        assert hasattr(init_context.resources, 'yup')
        assert not hasattr(init_context.resources, 'not_required')
        assert not hasattr(init_context.resources, 'kjdkfjdkfje')
        called['called'] = True
        return create_mem_system_storage_data(init_context)

    @pipeline(mode_defs=[
        ModeDefinition(
            resource_defs={
                'yup': ResourceDefinition.none_resource(),
                'not_required': ResourceDefinition.none_resource(),
            },
            system_storage_defs=[storage_with_req],
        )
    ])
    def resource_req_pass_pipeline():
        pass

    assert execute_pipeline(resource_req_pass_pipeline,
                            environment_dict={
                                'storage': {
                                    'storage_with_req': None
                                }
                            }).success

    assert called['called']
Ejemplo n.º 2
0
def test_resource_requirements_pass():
    called = {}

    @system_storage(required_resource_keys={'yup'})
    def storage_with_req(init_context):
        assert hasattr(init_context.resources, 'yup')
        assert not hasattr(init_context.resources, 'not_required')
        assert not hasattr(init_context.resources, 'kjdkfjdkfje')
        called['called'] = True
        return create_mem_system_storage_data(init_context)

    @pipeline(mode_defs=[
        ModeDefinition(
            resource_defs={
                'yup': ResourceDefinition.none_resource(),
                'not_required': ResourceDefinition.none_resource(),
            },
            system_storage_defs=[storage_with_req],
        )
    ])
    def resource_req_pass_pipeline():
        pass

    assert execute_pipeline(resource_req_pass_pipeline,
                            environment_dict={
                                'storage': {
                                    'storage_with_req': {}
                                }
                            }).success

    assert called['called']

    # This also works with None because storage is a selector and you can indicate your
    # "selection" by the presence of the key
    assert execute_pipeline(resource_req_pass_pipeline,
                            environment_dict={
                                'storage': {
                                    'storage_with_req': None
                                }
                            }).success
Ejemplo n.º 3
0
def test_resource_requirements_pass():
    called = {}

    @system_storage(required_resource_keys={"yup"})
    def storage_with_req(init_context):
        assert hasattr(init_context.resources, "yup")
        assert not hasattr(init_context.resources, "not_required")
        assert not hasattr(init_context.resources, "kjdkfjdkfje")
        called["called"] = True
        return create_mem_system_storage_data(init_context)

    @pipeline(
        mode_defs=[
            ModeDefinition(
                resource_defs={
                    "yup": ResourceDefinition.none_resource(),
                    "not_required": ResourceDefinition.none_resource(),
                },
                system_storage_defs=[storage_with_req],
            )
        ]
    )
    def resource_req_pass_pipeline():
        pass

    assert execute_pipeline(
        resource_req_pass_pipeline, run_config={"storage": {"storage_with_req": {}}}
    ).success

    assert called["called"]

    # This also works with None because storage is a selector and you can indicate your
    # "selection" by the presence of the key
    assert execute_pipeline(
        resource_req_pass_pipeline, run_config={"storage": {"storage_with_req": None}}
    ).success
def test_none_resource():
    called = {}

    @solid(required_resource_keys={"test_null"})
    def solid_test_null(context):
        assert context.resources.test_null is None
        called["yup"] = True

    pipeline = PipelineDefinition(
        name="test_none_resource",
        solid_defs=[solid_test_null],
        mode_defs=[ModeDefinition(resource_defs={"test_null": ResourceDefinition.none_resource()})],
    )

    result = execute_pipeline(pipeline)

    assert result.success
    assert called["yup"]
Ejemplo n.º 5
0
def assert_assets_match_project(assets):
    assert len(assets) == 4
    assets_by_name = {asset.op.name: asset for asset in assets}
    assert assets_by_name.keys() == {
        "sort_hot_cereals_by_calories",
        "sort_by_calories",
        "least_caloric",
        "sort_cold_cereals_by_calories",
    }
    for name, asset in assets_by_name.items():
        assert name == asset.op.name
        assert len(asset.op.output_defs) == 1
        assert f'["{name}"]' == asset.op.output_defs[
            0].hardcoded_asset_key.to_string()
        assert asset.op.tags == {"kind": "dbt"}

    job = build_assets_job(
        "jarb",
        assets,
        resource_defs={"dbt": ResourceDefinition.none_resource()})
    assert job.dependencies == {
        "least_caloric": {
            "sort_by_calories":
            DependencyDefinition(
                solid="sort_by_calories",
                output="result",
            )
        },
        "sort_by_calories": {},
        "sort_cold_cereals_by_calories": {
            "sort_by_calories":
            DependencyDefinition(
                solid="sort_by_calories",
                output="result",
            )
        },
        "sort_hot_cereals_by_calories": {
            "sort_by_calories":
            DependencyDefinition(
                solid="sort_by_calories",
                output="result",
            )
        },
    }
Ejemplo n.º 6
0
def test_resource_requirements_fail():
    @system_storage(required_resource_keys={'yup'})
    def storage_with_req(init_context):
        return create_mem_system_storage_data(init_context)

    with pytest.raises(DagsterInvalidDefinitionError) as exc_info:

        @pipeline(mode_defs=[
            ModeDefinition(
                resource_defs={'nope': ResourceDefinition.none_resource()},
                system_storage_defs=[storage_with_req],
            )
        ])
        def _resource_req_pass_pipeline():
            pass

    assert str(exc_info.value) == (
        'Resource "yup" is required by system storagestorage_with_req, but '
        'is not provided by mode "default".')
Ejemplo n.º 7
0
def test_resource_requirements_fail():
    @intermediate_storage(required_resource_keys={"yup"})
    def storage_with_req(init_context):
        return create_mem_system_intermediate_store(init_context)

    with pytest.raises(
        DagsterInvalidDefinitionError,
        match=r'"yup" is required by intermediate storage "storage_with_req"',
    ):

        @pipeline(
            mode_defs=[
                ModeDefinition(
                    resource_defs={"nope": ResourceDefinition.none_resource()},
                    intermediate_storage_defs=[storage_with_req],
                )
            ]
        )
        def _resource_req_pass_pipeline():
            pass
Ejemplo n.º 8
0
def test_none_resource():
    called = {}

    @solid(resources={'test_null'})
    def solid_test_null(context):
        assert context.resources.test_null is None
        called['yup'] = True

    pipeline = PipelineDefinition(
        name='test_none_resource',
        solids=[solid_test_null],
        mode_definitions=[
            ModeDefinition(
                resources={'test_null': ResourceDefinition.none_resource()})
        ],
    )

    result = execute_pipeline(pipeline)

    assert result.success
    assert called['yup']
def test_resource_requirements_fail():
    @intermediate_storage(required_resource_keys={"yup"})
    def storage_with_req(init_context):
        return create_mem_system_intermediate_store(init_context)

    with pytest.raises(DagsterInvalidDefinitionError) as exc_info:

        @pipeline(
            mode_defs=[
                ModeDefinition(
                    resource_defs={"nope": ResourceDefinition.none_resource()},
                    intermediate_storage_defs=[storage_with_req],
                )
            ]
        )
        def _resource_req_pass_pipeline():
            pass

    assert str(exc_info.value) == (
        "Resource 'yup' is required by intermediate storage 'storage_with_req', but "
        "is not provided by mode 'default'."
    )
Ejemplo n.º 10
0
def test_none_resource():
    called = {}

    @solid
    def solid_test_null(info):
        assert info.context.resources.test_null is None
        called['yup'] = True

    pipeline = PipelineDefinition(
        name='test_none_resource',
        solids=[solid_test_null],
        context_definitions={
            'default':
            PipelineContextDefinition(
                resources={'test_null': ResourceDefinition.none_resource()})
        },
    )

    result = execute_pipeline(pipeline)

    assert result.success
    assert called['yup']
Ejemplo n.º 11
0
def test_download():
    with tempfile.TemporaryDirectory() as temp_dir:
        test_job = AssetGroup.from_package_name(
            "hacker_news_assets.assets",
            resource_defs={
                "io_manager": fs_io_manager,
                "partition_start": ResourceDefinition.string_resource(),
                "partition_end": ResourceDefinition.string_resource(),
                "parquet_io_manager": local_partitioned_parquet_io_manager.configured(
                    {"base_path": temp_dir}
                ),
                "warehouse_io_manager": mem_io_manager,
                "pyspark": pyspark_resource,
                "hn_client": hn_snapshot_client,
                "dbt": ResourceDefinition.none_resource(),
            },
        ).build_job(
            "test_job",
            selection=["*comments", "*stories"],
        )

        result = test_job.execute_in_process(partition_key="2020-12-30-00:00")

        assert result.success
Ejemplo n.º 12
0
    raw_events = make_raw_events()
    clean_events(raw_events)


@schedule(job=event_tables, cron_schedule="0 0 * * *")
def event_tables_schedule(_):
    return {}


@graph
def event_reports():
    make_event_reports = make_solid("make_event_reports", required_resource_keys={"mode"})
    make_event_reports()


@sensor(job=event_reports.to_job(resource_defs={"mode": ResourceDefinition.none_resource()}))
def event_reports_sensor():
    pass


event_reports_dev = event_reports.to_job(resource_defs={"mode": ResourceDefinition.none_resource()})


@graph
def crm_ingest():
    """A graph with multiple production jobs"""
    ingest_users = make_solid("ingest_users", required_resource_keys={"crm"})
    ingest_interactions = make_solid("ingest_interactions", required_resource_keys={"crm"})

    ingest_users()
    ingest_interactions()
Ejemplo n.º 13
0
    """
    Trains a collaborative filtering model that can recommend HN stories to users based on what
    stories they've commented on in the past.
    """
    comment_stories = build_comment_stories()
    user_story_matrix = build_user_story_matrix(comment_stories)
    recommender_model = build_recommender_model(user_story_matrix)
    model_perf_notebook(recommender_model)
    build_component_top_stories(recommender_model, user_story_matrix)
    build_user_top_recommended_stories(recommender_model, user_story_matrix)


story_recommender_prod_job = story_recommender.to_job(resource_defs={
    **RESOURCES_PROD,
    **{
        "partition_bounds": ResourceDefinition.none_resource()
    },
})

story_recommender_staging_job = story_recommender.to_job(
    resource_defs={
        **RESOURCES_STAGING,
        **{
            "partition_bounds": ResourceDefinition.none_resource()
        },
    })

story_recommender_local_job = story_recommender.to_job(
    resource_defs={
        **RESOURCES_LOCAL,
        **{
Ejemplo n.º 14
0

@schedule(job=event_tables, cron_schedule="0 0 * * *")
def event_tables_schedule(_):
    return {}


@graph
def event_reports():
    make_event_reports = make_solid("make_event_reports",
                                    required_resource_keys={"mode"})
    make_event_reports()


@sensor(job=event_reports.to_job(
    resource_defs={"mode": ResourceDefinition.none_resource()}))
def event_reports_sensor():
    pass


event_reports_dev = event_reports.to_job(
    resource_defs={"mode": ResourceDefinition.none_resource()})


@graph
def crm_ingest():
    """A graph with multiple production jobs"""
    ingest_users = make_solid("ingest_users", required_resource_keys={"crm"})
    ingest_interactions = make_solid("ingest_interactions",
                                     required_resource_keys={"crm"})
Ejemplo n.º 15
0
            "dbt":
            dbt_prod_resource,
            # this is an alternative pattern to the configured() api. If you know that you won't want to
            # further configure this resource per pipeline run, this can be a bit more convenient than
            # defining an @resource with a config schema.
            "dbt_assets":
            ResourceDefinition.hardcoded_resource(
                SnowflakeQueryDbtAssetResource(
                    {
                        **{
                            "database": "DEMO_DB"
                        },
                        **SHARED_SNOWFLAKE_CONF
                    }, "hackernews")),
            "partition_bounds":
            ResourceDefinition.none_resource(),
        },
    })

dbt_staging_job = dbt_metrics.to_job(
    resource_defs={
        **RESOURCES_STAGING,
        **{
            "dbt":
            dbt_staging_resource,
            "dbt_assets":
            ResourceDefinition.hardcoded_resource(
                SnowflakeQueryDbtAssetResource(
                    {
                        **{
                            "database": "DEMO_DB_STAGING"