Beispiel #1
0
def test_complex_table_schema():
    assert isinstance(
        TableSchema(
            columns=[
                TableColumn(
                    name="foo",
                    type="customtype",
                    constraints=TableColumnConstraints(
                        nullable=True,
                        unique=True,
                    ),
                ),
                TableColumn(
                    name="bar",
                    type="string",
                    description="bar",
                    constraints=TableColumnConstraints(
                        nullable=False,
                        other=["foo"],
                    ),
                ),
            ],
            constraints=TableConstraints(other=["foo"]),
        ),
        TableSchema,
    )
Beispiel #2
0
def test_table_metadata_value_schema_inference():

    table_metadata_value = MetadataEntry.table(
        records=[
            TableRecord(name="foo", status=False),
            TableRecord(name="bar", status=True),
        ],
        label="foo",
    )

    schema = table_metadata_value.entry_data.schema
    assert isinstance(schema, TableSchema)
    assert schema.columns == [
        TableColumn(name="name", type="string"),
        TableColumn(name="status", type="bool"),
    ]
Beispiel #3
0
def test_table_column_values(key, value):
    kwargs = {
        "name": "foo",
        "type": "string",
        "description": "bar",
        "constraints": TableColumnConstraints(other=["foo"]),
    }
    kwargs[key] = value
    with pytest.raises(CheckError):
        TableColumn(**kwargs)
def test_pandera_schema_to_dagster_type(schema):
    dagster_type = pandera_schema_to_dagster_type(schema)
    assert isinstance(dagster_type, DagsterType)
    assert len(dagster_type.metadata_entries) == 1
    schema_entry = dagster_type.metadata_entries[0]
    assert isinstance(schema_entry.entry_data, TableSchemaMetadataValue)
    assert schema_entry.entry_data.schema == TableSchema(
        constraints=TableConstraints(other=["sum(a) > sum(b)"]),
        columns=[
            TableColumn(
                name="a",
                type="int64",
                description="a desc",
                constraints=TableColumnConstraints(nullable=False,
                                                   other=["<= 10"]),
            ),
            TableColumn(
                name="b",
                type="float64",
                description="b desc",
                constraints=TableColumnConstraints(nullable=False,
                                                   other=["< -1.2"]),
            ),
            TableColumn(
                name="c",
                type="str",
                description="c desc",
                constraints=TableColumnConstraints(
                    nullable=False,
                    other=[
                        "str_startswith(value_)",
                        "Two words separated by underscore",
                    ],
                ),
            ),
        ],
    )
Beispiel #5
0
def test_table_schema_values(key, value):
    kwargs = {
        "constraints": TableConstraints(other=["foo"]),
        "columns": [
            TableColumn(
                name="foo",
                type="string",
                description="bar",
                constraints=TableColumnConstraints(other=["foo"]),
            )
        ],
    }
    kwargs[key] = value
    with pytest.raises(CheckError):
        TableSchema(**kwargs)
Beispiel #6
0
def _materialization_for_stream(
    name: str,
    stream_schema_props: Dict[str, Any],
    stream_stats: Dict[str, Any],
    asset_key_prefix: List[str],
) -> AssetMaterialization:

    return AssetMaterialization(
        asset_key=asset_key_prefix + [name],
        metadata={
            "schema": MetadataValue.table_schema(
                TableSchema(
                    columns=[
                        TableColumn(name=name, type=str(info.get("type", "unknown")))
                        for name, info in stream_schema_props.items()
                    ]
                )
            ),
            **{k: v for k, v in stream_stats.items() if v is not None},
        },
    )
Beispiel #7
0
def _materialization_for_stream(
    name: str,
    stream_info: Dict[str, Any],
    stream_stats: Dict[str, Any],
    asset_key_prefix: List[str],
) -> AssetMaterialization:

    return AssetMaterialization(
        asset_key=asset_key_prefix + [name],
        metadata={
            "schema":
            MetadataValue.table_schema(
                TableSchema(columns=[
                    TableColumn(name=name, type=str(info["type"]))
                    for name, info in stream_info["stream"]["jsonSchema"]
                    ["properties"].items()
                ])),
            "columns":
            ",".join(name for name in stream_info["stream"]["jsonSchema"]
                     ["properties"].keys()),
            **{k: v
               for k, v in stream_stats.items() if v is not None},
        },
    )
Beispiel #8
0
def test_table_constraints_keys():
    with pytest.raises(TypeError):
        TableColumn(bad_key="foo")  # type: ignore
Beispiel #9
0
def test_table_column_keys():
    with pytest.raises(TypeError):
        TableColumn(bad_key="foo", description="bar",
                    type="string")  # type: ignore