Example #1
0
def test_bollinger_analysis():
    bollinger_sda = AssetGroup(
        assets=[sp500_anomalous_events, sp500_bollinger_bands, sp500_prices],
        resource_defs={"io_manager": local_csv_io_manager},
    )
    result = bollinger_sda.build_job("test_job").execute_in_process()
    assert result.asset_materializations_for_node
Example #2
0
def test_asset_io_manager(gcs_bucket):
    @asset
    def upstream():
        return 2

    @asset
    def downstream(upstream):
        return 1 + upstream

    @asset(partitions_def=StaticPartitionsDefinition(["apple", "orange"]))
    def partitioned():
        return 8

    fake_gcs_client = FakeGCSClient()
    asset_group = AssetGroup(
        [upstream, downstream, partitioned],
        resource_defs={
            "io_manager":
            gcs_pickle_asset_io_manager.configured({
                "gcs_bucket": gcs_bucket,
                "gcs_prefix": "assets"
            }),
            "gcs":
            ResourceDefinition.hardcoded_resource(fake_gcs_client),
        },
    )
    asset_job = asset_group.build_job(name="my_asset_job")

    result = asset_job.execute_in_process(partition_key="apple")
    assert result.success
    assert fake_gcs_client.get_all_blob_paths() == {
        f"{gcs_bucket}/assets/upstream",
        f"{gcs_bucket}/assets/downstream",
        f"{gcs_bucket}/assets/partitioned/apple",
    }
Example #3
0
 def foo():
     return [
         AssetGroup(dbt_assets_a, resource_defs={
             "dbt": dbt_cli_resource
         }).build_job("a"),
         AssetGroup(dbt_assets_b, resource_defs={
             "dbt": dbt_cli_resource
         }).build_job("b"),
     ]
def test_cereal_asset_group():
    group = AssetGroup([
        nabisco_cereals,
        cereals,
        cereal_protein_fractions,
        highest_protein_nabisco_cereal,
    ])

    result = group.materialize()
    assert result.success
    assert result.output_for_node(
        "highest_protein_nabisco_cereal") == "100% Bran"
Example #5
0
def make_activity_stats_job(asset_group: AssetGroup) -> JobDefinition:
    return asset_group.build_job(
        name="activity_stats",
        selection=[
            "comment_daily_stats",
            "story_daily_stats",
            "activity_daily_stats",
            "activity_forecast",
        ],
    )
Example #6
0
def test_asset_io_manager(storage_account, file_system, credential):
    @asset
    def upstream():
        return 2

    @asset
    def downstream(upstream):
        assert upstream == 2
        return 1 + upstream

    asset_group = AssetGroup(
        [upstream, downstream],
        resource_defs={
            "io_manager": adls2_pickle_asset_io_manager,
            "adls2": adls2_resource
        },
    )
    asset_job = asset_group.build_job(name="my_asset_job")

    run_config = {
        "resources": {
            "io_manager": {
                "config": {
                    "adls2_file_system": file_system
                }
            },
            "adls2": {
                "config": {
                    "storage_account": storage_account,
                    "credential": {
                        "key": credential
                    }
                }
            },
        }
    }

    result = asset_job.execute_in_process(run_config=run_config)
    assert result.success
def make_download_job(asset_group: AssetGroup) -> JobDefinition:
    return asset_group.build_job(
        name="hacker_news_api_download",
        selection=["*comments", "*stories"],
        tags={
            "dagster-k8s/config": {
                "container_config": {
                    "resources": {
                        "requests": {"cpu": "500m", "memory": "2Gi"},
                    }
                },
            }
        },
    )
Example #8
0
def test_download():
    with tempfile.TemporaryDirectory() as temp_dir:
        test_job = AssetGroup.from_package_name(
            "hacker_news_assets.assets",
            resource_defs={
                "io_manager": fs_io_manager,
                "partition_start": ResourceDefinition.string_resource(),
                "partition_end": ResourceDefinition.string_resource(),
                "parquet_io_manager": local_partitioned_parquet_io_manager.configured(
                    {"base_path": temp_dir}
                ),
                "warehouse_io_manager": mem_io_manager,
                "pyspark": pyspark_resource,
                "hn_client": hn_snapshot_client,
                "dbt": ResourceDefinition.none_resource(),
            },
        ).build_job(
            "test_job",
            selection=["*comments", "*stories"],
        )

        result = test_job.execute_in_process(partition_key="2020-12-30-00:00")

        assert result.success
Example #9
0
def test_serial_asset_graph():
    result = AssetGroup.from_modules([serial_asset_graph]).materialize()
    assert result.success
Example #10
0
    def load_input(self, context):
        assert context
        return DataFrame()


@asset
def daily_temperature_highs(sfo_q2_weather_sample: DataFrame) -> DataFrame:
    """Computes the temperature high for each day"""
    assert sfo_q2_weather_sample
    time.sleep(3)
    return DataFrame()


@asset
def hottest_dates(daily_temperature_highs: DataFrame) -> DataFrame:
    """Computes the 10 hottest dates"""
    assert daily_temperature_highs
    time.sleep(3)
    return DataFrame()


software_defined_assets = AssetGroup(
    assets=[daily_temperature_highs, hottest_dates],
    source_assets=[sfo_q2_weather_sample],
    resource_defs={
        "io_manager":
        IOManagerDefinition.hardcoded_io_manager(DummyIOManager())
    },
)
# pylint: disable=redefined-outer-name
# start_marker
from dagster import AssetGroup, asset, fs_asset_io_manager
from dagster_aws.s3 import s3_pickle_asset_io_manager, s3_resource


@asset
def upstream_asset():
    return [1, 2, 3]


@asset
def downstream_asset(upstream_asset):
    return upstream_asset + [4]


prod_asset_group = AssetGroup(
    [upstream_asset, downstream_asset],
    resource_defs={"io_manager": s3_pickle_asset_io_manager, "s3": s3_resource},
)

local_asset_group = AssetGroup(
    [upstream_asset, downstream_asset], resource_defs={"io_manager": fs_asset_io_manager}
)

# end_marker
Example #12
0
def bollinger():
    return [
        AssetGroup.from_package_name(__name__, resource_defs={"io_manager": local_csv_io_manager})
    ]
Example #13
0
# pylint: disable=redefined-outer-name
# start_marker
from dagster import AssetGroup, AssetKey, SourceAsset, asset

my_source_asset = SourceAsset(key=AssetKey("my_source_asset"))


@asset
def my_derived_asset(my_source_asset):
    return my_source_asset + [4]


asset_group = AssetGroup(assets=[my_derived_asset],
                         source_assets=[my_source_asset])

# end_marker
Example #14
0
       partitions_def=daily_partitions_def)
def downstream_daily_partitioned_asset(upstream_daily_partitioned_asset):
    assert upstream_daily_partitioned_asset is None


@asset(
    metadata={"owner": "*****@*****.**"},
    partitions_def=HourlyPartitionsDefinition(
        start_date=datetime(2022, 3, 12, 0, 0)),
)
def hourly_partitioned_asset():
    pass


@asset(
    metadata={
        "owner":
        "*****@*****.**",
        "text_metadata":
        "Text-based metadata about this asset",
        "path":
        MetadataValue.path("/unpartitioned/asset"),
        "dashboard_url":
        MetadataValue.url("http://mycoolsite.com/url_for_my_asset"),
    }, )
def unpartitioned_asset():
    pass


partitioned_asset_group = AssetGroup.from_current_module()
Example #15
0
        optimize.curve_fit(f=model_func,
                           xdata=df.order_date.astype(np.int64),
                           ydata=df.num_orders,
                           p0=[10, 100])[0])


@asset(compute_kind="python", io_manager_key="pandas_io_manager")
def predicted_orders(
        daily_order_summary: pd.DataFrame,
        order_forecast_model: Tuple[float, float]) -> pd.DataFrame:
    """Predicted orders for the next 30 days based on the fit paramters"""
    a, b = order_forecast_model
    start_date = daily_order_summary.order_date.max()
    future_dates = pd.date_range(start=start_date,
                                 end=start_date + pd.DateOffset(days=30))
    predicted_data = model_func(x=future_dates.astype(np.int64), a=a, b=b)
    return pd.DataFrame({
        "order_date": future_dates,
        "num_orders": predicted_data
    })


analytics_assets = AssetGroup(
    airbyte_assets + dbt_assets + [order_forecast_model, predicted_orders],
    resource_defs={
        "airbyte": airbyte_resource.configured(AIRBYTE_CONFIG),
        "dbt": dbt_cli_resource.configured(DBT_CONFIG),
        "pandas_io_manager": pandas_io_manager.configured(PANDAS_IO_CONFIG),
    },
).build_job("Assets")
Example #16
0
from hacker_news_assets.resources import RESOURCES_LOCAL, RESOURCES_PROD, RESOURCES_STAGING

from dagster import AssetGroup, in_process_executor

prod_assets = AssetGroup.from_package_name(__name__, resource_defs=RESOURCES_PROD)
staging_assets = AssetGroup.from_package_name(__name__, resource_defs=RESOURCES_STAGING)
local_assets = AssetGroup.from_package_name(
    __name__, resource_defs=RESOURCES_LOCAL, executor_def=in_process_executor
)
Example #17
0
"""isort:skip_file"""

# start_asset_marker
import csv
import requests
from dagster import asset


@asset
def cereals():
    response = requests.get("https://docs.dagster.io/assets/cereal.csv")
    lines = response.text.split("\n")
    cereal_rows = [row for row in csv.DictReader(lines)]

    return cereal_rows


# end_asset_marker

# start_materialize_marker
from dagster import AssetGroup

if __name__ == "__main__":
    AssetGroup([cereals]).materialize()

# end_materialize_marker
Example #18
0
from dagster import AssetGroup

asset_group = AssetGroup.from_package_name(
    "docs_snippets.concepts.assets.package_with_assets")
Example #19
0
def test_cereal():
    result = AssetGroup.from_modules([cereal]).materialize()
    assert result.success
Example #20
0
 def my_repo():
     return [AssetGroup(assets=[], source_assets=[foo, bar])]
Example #21
0
    def _get_fs_path(self, asset_key: AssetKey) -> str:
        rpath = os.path.join(*asset_key.path) + ".csv"
        return os.path.abspath(rpath)

    def handle_output(self, context, obj: DataFrame):
        """This saves the dataframe as a CSV."""
        fpath = self._get_fs_path(context.asset_key)
        obj.to_csv(fpath)

    def load_input(self, context):
        """This reads a dataframe from a CSV."""
        fpath = self._get_fs_path(context.asset_key)
        return pd.read_csv(fpath)


# io_manager_end

# asset_group_start
# imports the module called "assets" from the package containing the current module
# the "assets" module contains the asset definitions
from . import assets

weather_assets = AssetGroup.from_modules(
    modules=[assets],
    resource_defs={
        "io_manager":
        IOManagerDefinition.hardcoded_io_manager(LocalFileSystemIOManager())
    },
)
# asset_group_end
Example #22
0
def make_story_recommender_job(asset_group: AssetGroup) -> JobDefinition:
    return asset_group.build_job(
        name="story_recommender",
        selection=["comment_stories*"],
    )
Example #23
0
# pylint: disable=redefined-outer-name
from dagster import AssetGroup, AssetIn, asset

namespace1 = ["s3", "superdomain_1", "subdomain_1", "subsubdomain_1"]


@asset(namespace=namespace1)
def asset1():
    pass


@asset(
    namespace=["s3", "superdomain_2", "subdomain_2", "subsubdomain_2"],
    ins={"asset1": AssetIn(namespace=namespace1)},
)
def asset2(asset1):
    assert asset1 is None


long_asset_keys_group = AssetGroup([asset1, asset2])
# pylint: disable=redefined-outer-name
# start_marker
from dagster import AssetGroup, asset, fs_asset_io_manager
from dagster_aws.s3 import s3_pickle_asset_io_manager, s3_resource


@asset(io_manager_key="s3_io_manager")
def upstream_asset():
    return [1, 2, 3]


@asset(io_manager_key="fs_io_manager")
def downstream_asset(upstream_asset):
    return upstream_asset + [4]


asset_group = AssetGroup(
    [upstream_asset, downstream_asset],
    resource_defs={
        "s3_io_manager": s3_pickle_asset_io_manager,
        "s3": s3_resource,
        "fs_io_manager": fs_asset_io_manager,
    },
)

# end_marker
Example #25
0
# pylint: disable=redefined-outer-name
from dagster import AssetGroup, AssetKey, SourceAsset, asset, repository


@asset
def upstream_asset():
    return 5


upstream_asset_group = AssetGroup([upstream_asset])


@repository
def upstream_assets_repository():
    return [upstream_asset_group]


source_assets = [SourceAsset(AssetKey("upstream_asset"))]


@asset
def downstream_asset1(upstream_asset):
    assert upstream_asset


@asset
def downstream_asset2(upstream_asset):
    assert upstream_asset


downstream_asset_group1 = AssetGroup(assets=[downstream_asset1],
import random
from typing import Sequence

from dagster import AssetGroup, AssetKey, asset

N_ASSETS = 1000


def generate_big_honkin_assets() -> Sequence:
    random.seed(5438790)
    assets = []

    for i in range(N_ASSETS):
        non_argument_deps = {
            AssetKey(f"asset_{j}")
            for j in random.sample(range(i), min(i, random.randint(0, 3)))
        }

        @asset(name=f"asset_{i}", non_argument_deps=non_argument_deps)
        def some_asset():
            pass

        assets.append(some_asset)

    return assets


big_honkin_asset_group = AssetGroup(generate_big_honkin_assets())
Example #27
0
# pylint: disable=redefined-outer-name
# start_marker
from dagster import AssetGroup, asset


@asset
def upstream():
    return [1, 2, 3]


@asset
def downstream_1(upstream):
    return upstream + [4]


@asset
def downstream_2(upstream):
    return len(upstream)


asset_group = AssetGroup([upstream, downstream_1, downstream_2])

all_assets = asset_group.build_job(name="my_asset_job")

downstream_assets = asset_group.build_job(
    name="my_asset_job", selection=["upstream", "downstream_1"])

upstream_and_downstream_1 = asset_group.build_job(name="my_asset_job",
                                                  selection="*downstream_1")
# end_marker