Ejemplo n.º 1
0
def asset_metadata(_context, model_info):
    config = dict(SHARED_SNOWFLAKE_CONF)
    config["schema"] = model_info["schema"]
    with connect_snowflake(config=config) as con:
        df = pd.read_sql(f"SELECT * FROM {model_info['name']} LIMIT 5",
                         con=con)
        num_rows = con.execute(
            f"SELECT COUNT(*) FROM {model_info['name']}").fetchone()

    return {
        "Data sample": EventMetadata.md(df.to_markdown()),
        "Rows": num_rows[0]
    }
Ejemplo n.º 2
0
def many_table_materializations(_context):
    with open(file_relative_path(__file__, MARKDOWN_EXAMPLE), "r") as f:
        md_str = f.read()
        for table in raw_tables:
            yield AssetMaterialization(
                asset_key="table_info",
                metadata={
                    "table_name": table,
                    "table_path": EventMetadata.path(f"/path/to/{table}"),
                    "table_data": {"name": table},
                    "table_name_big": EventMetadata.url(f"https://bigty.pe/{table}"),
                    "table_blurb": EventMetadata.md(md_str),
                    "big_int": 29119888133298982934829348,
                    "float_nan": float("nan"),
                },
            )
Ejemplo n.º 3
0
def build_component_top_stories(model: TruncatedSVD,
                                user_story_matrix: IndexedCooMatrix,
                                story_titles: DataFrame):
    """
    For each component in the collaborative filtering model, finds the titles of the top stories
    it's associated with.
    """
    n_stories = 10

    components_column = []
    titles_column = []

    story_titles = story_titles.set_index("id")

    for i in range(model.components_.shape[0]):
        component = model.components_[i]
        top_story_indices = component.argsort()[-n_stories:][::-1]
        top_story_ids = user_story_matrix.col_index[top_story_indices]
        top_story_titles = story_titles.loc[top_story_ids]

        for title in top_story_titles["title"]:
            components_column.append(i)
            titles_column.append(title)

    component_top_stories = DataFrame({
        "component_index":
        Series(components_column),
        "title":
        Series(titles_column)
    })

    yield Output(
        component_top_stories,
        metadata={
            "Top component top stories":
            EventMetadata.md(
                top_components_to_markdown(component_top_stories)),
        },
    )
Ejemplo n.º 4
0
def metadata_for_actions(df):
    return {
        "min_score": int(df["score"].min()),
        "max_score": int(df["score"].max()),
        "sample rows": EventMetadata.md(df[:5].to_markdown()),
    }
Ejemplo n.º 5
0
def daily_top_action(_, df1, df2):
    df = pd.concat([df1, df2]).nlargest(1, "score")
    return Output(df, metadata={"data": EventMetadata.md(df.to_markdown())})
Ejemplo n.º 6
0
 def _best_n_actions(_, df):
     df = df.nlargest(n, "score")
     return Output(
         df,
         metadata={"data": EventMetadata.md(df.to_markdown())},
     )