Esempio n. 1
0
    def handle_output(self, context, obj: pd.DataFrame):
        """This saves the dataframe as a CSV."""
        fpath = self._get_fs_path(context.asset_key)
        os.makedirs(os.path.dirname(fpath), exist_ok=True)
        obj.to_csv(fpath)
        with open(fpath + ".version", "w") as f:
            f.write(context.version if context.version else "None")

        yield MetadataEntry.int(obj.shape[0], "Rows")
        yield MetadataEntry.path(fpath, "Path")
        yield MetadataEntry.md(obj.head(5).to_markdown(), "Sample")
        yield MetadataEntry.text(context.version, "Resolved version")
        yield MetadataEntry.table_schema(
            self.get_schema(context.dagster_type),
            "Schema",
        )
Esempio n. 2
0
 def materialize(_):
     yield AssetMaterialization(
         asset_key="all_types",
         description="a materialization with all metadata types",
         metadata_entries=[
             MetadataEntry.text("text is cool", "text"),
             MetadataEntry.url("https://bigty.pe/neato", "url"),
             MetadataEntry.fspath("/tmp/awesome", "path"),
             MetadataEntry.json({"is_dope": True}, "json"),
             MetadataEntry.python_artifact(MetadataEntry, "python class"),
             MetadataEntry.python_artifact(file_relative_path,
                                           "python function"),
             MetadataEntry.float(1.2, "float"),
             MetadataEntry.int(1, "int"),
             MetadataEntry.float(float("nan"), "float NaN"),
             MetadataEntry.int(LONG_INT, "long int"),
             MetadataEntry.pipeline_run("fake_run_id", "pipeline run"),
             MetadataEntry.asset(AssetKey("my_asset"), "my asset"),
             MetadataEntry.table(
                 label="table",
                 records=[
                     TableRecord(foo=1, bar=2),
                     TableRecord(foo=3, bar=4),
                 ],
             ),
             MetadataEntry.table_schema(
                 label="table_schema",
                 schema=TableSchema(
                     columns=[
                         TableColumn(
                             name="foo",
                             type="integer",
                             constraints=TableColumnConstraints(
                                 unique=True),
                         ),
                         TableColumn(name="bar", type="string"),
                     ],
                     constraints=TableConstraints(other=["some constraint"
                                                         ], ),
                 ),
             ),
         ],
     )
     yield Output(None)