コード例 #1
0
ファイル: comment_stories.py プロジェクト: trevenrawr/dagster
from pandas import DataFrame, Series

from dagster import AssetIn, asset


@asset(
    ins={
        "stories": AssetIn(metadata={"columns": ["id"]}),
        "comments": AssetIn(metadata={"columns": ["id", "user_id", "parent"]}),
    },
    io_manager_key="warehouse_io_manager",
)
def comment_stories(stories: DataFrame, comments: DataFrame) -> DataFrame:
    """
    Comments linked to their root stories.

    Owners: [email protected], [email protected]
    """
    comments.rename(columns={
        "user_id": "commenter_id",
        "id": "comment_id"
    },
                    inplace=True)
    comments = comments.set_index("comment_id")[["commenter_id", "parent"]]
    stories = stories.set_index("id")[[]]

    full_comment_stories = DataFrame(
        index=Series(name="comment_id", dtype="int"),
        data={
            "story_id": Series(dtype="int"),
            "commenter_id": Series(dtype="object")
コード例 #2
0
    svd = TruncatedSVD(n_components=n_components)
    svd.fit(user_story_matrix.matrix)

    total_explained_variance = svd.explained_variance_ratio_.sum()

    yield Output(
        svd,
        metadata={
            "Total explained variance ratio": total_explained_variance,
            "Number of components": n_components,
        },
    )


@asset(
    ins={"stories": AssetIn(metadata={"columns": ["id", "title"]})},
    io_manager_key="warehouse_io_manager",
)
def component_top_stories(recommender_model: TruncatedSVD,
                          user_story_matrix: IndexedCooMatrix,
                          stories: DataFrame):
    """
    For each component in the collaborative filtering model, the titles of the top stories
    it's associated with.
    """
    n_stories = 10

    components_column = []
    titles_column = []

    story_titles = stories.set_index("id")
コード例 #3
0
# pylint: disable=redefined-outer-name
# start_marker
from dagster import AssetIn, asset


@asset(namespace=["one", "two", "three"])
def upstream_asset():
    return [1, 2, 3]


@asset(ins={"upstream_asset": AssetIn(namespace=["one", "two", "three"])})
def downstream_asset(upstream_asset):
    return upstream_asset + [4]


# end_marker
コード例 #4
0
from dagster import AssetIn, asset


@asset
def upstream_asset():
    return [1, 2, 3]


@asset(ins={"upstream": AssetIn("upstream_asset")})
def downstream_asset(upstream):
    return upstream + [4]
コード例 #5
0
# pylint: disable=redefined-outer-name
from dagster import AssetGroup, AssetIn, asset

namespace1 = ["s3", "superdomain_1", "subdomain_1", "subsubdomain_1"]


@asset(namespace=namespace1)
def asset1():
    pass


@asset(
    namespace=["s3", "superdomain_2", "subdomain_2", "subsubdomain_2"],
    ins={"asset1": AssetIn(namespace=namespace1)},
)
def asset2(asset1):
    assert asset1 is None


long_asset_keys_group = AssetGroup([asset1, asset2])