Ejemplo n.º 1
0
def test_validate_constraints_ok():
    column_constraints = [
        PandasColumn(name='foo', constraints=[ColumnTypeConstraint('object')]),
    ]
    dataframe = DataFrame({'foo': ['bar', 'baz']})
    assert validate_constraints(dataframe,
                                pandas_columns=column_constraints) is None
Ejemplo n.º 2
0
def test_missing_column_validation_with_optional_column():
    column_constraints = [
        PandasColumn(
            name="qux", constraints=[ColumnDTypeInSetConstraint({"object"})], is_required=False
        ),
    ]
    dataframe = DataFrame({"foo": ["bar", "baz"]})
    assert validate_constraints(dataframe, pandas_columns=column_constraints) is None
def test_dataframe_description_generation_just_type_constraint():
    TestDataFrame = create_dagster_pandas_dataframe_type(
        name='TestDataFrame',
        columns=[
            PandasColumn(name='foo',
                         constraints=[ColumnTypeConstraint('int64')])
        ],
    )
    assert TestDataFrame.description == "\n### Columns\n**foo**: `int64`\n\n"
Ejemplo n.º 4
0
def test_dataframe_description_generation_just_type_constraint():
    TestDataFrame = create_dagster_pandas_dataframe_type(
        name="TestDataFrame",
        columns=[
            PandasColumn(name="foo",
                         constraints=[ColumnDTypeInSetConstraint({"int64"})])
        ],
    )
    assert TestDataFrame.description == "\n### Columns\n**foo**: `int64`\n\n"
Ejemplo n.º 5
0
def test_create_pandas_dataframe_dagster_type():
    TestDataFrame = create_dagster_pandas_dataframe_type(
        name="TestDataFrame",
        columns=[
            PandasColumn(name="foo",
                         constraints=[ColumnDTypeInSetConstraint({"int64"})])
        ],
    )
    assert isinstance(TestDataFrame, DagsterType)
Ejemplo n.º 6
0
def test_missing_column_validation():
    column_constraints = [
        PandasColumn(name="qux", constraints=[ColumnDTypeInSetConstraint({"object"})]),
    ]
    dataframe = DataFrame({"foo": ["bar", "baz"]})
    with pytest.raises(
        ConstraintViolationException, match="Required column qux not in dataframe with columns"
    ):
        validate_constraints(dataframe, pandas_columns=column_constraints)
Ejemplo n.º 7
0
def test_missing_column_validation_with_optional_column():
    column_constraints = [
        PandasColumn(name='qux',
                     constraints=[ColumnDTypeInSetConstraint({'object'})],
                     is_required=False),
    ]
    dataframe = DataFrame({'foo': ['bar', 'baz']})
    assert validate_constraints(dataframe,
                                pandas_columns=column_constraints) is None
def test_create_pandas_dataframe_dagster_type():
    TestDataFrame = create_dagster_pandas_dataframe_type(
        name='TestDataFrame',
        columns=[
            PandasColumn(name='foo',
                         constraints=[ColumnTypeConstraint('int64')])
        ],
    )
    assert isinstance(TestDataFrame, RuntimeType)
Ejemplo n.º 9
0
def test_missing_column_validation():
    column_constraints = [
        PandasColumn(name='qux',
                     constraints=[ColumnDTypeInSetConstraint({'object'})]),
    ]
    dataframe = DataFrame({'foo': ['bar', 'baz']})
    with pytest.raises(
            ConstraintViolationException,
            match="Required column qux not in dataframe with columns"):
        validate_constraints(dataframe, pandas_columns=column_constraints)
def test_dataframe_description_generation_multi_constraints():
    TestDataFrame = create_dagster_pandas_dataframe_type(
        name='TestDataFrame',
        columns=[
            PandasColumn(
                name='foo',
                constraints=[
                    ColumnTypeConstraint('int64'),
                    InRangeColumnConstraint(0, 100),
                    NonNullableColumnConstraint(),
                ],
            ),
        ],
    )
    assert (
        TestDataFrame.description ==
        "\n### Columns\n**foo**: `int64`\n+ 0 < values < 100\n+ No Null values allowed.\n\n"
    )
Ejemplo n.º 11
0
def test_dataframe_description_generation_multi_constraints():
    TestDataFrame = create_dagster_pandas_dataframe_type(
        name="TestDataFrame",
        columns=[
            PandasColumn(
                name="foo",
                constraints=[
                    ColumnDTypeInSetConstraint({"int64"}),
                    InRangeColumnConstraint(0, 100, ignore_missing_vals=False),
                    NonNullableColumnConstraint(),
                ],
            ),
        ],
    )
    assert (
        TestDataFrame.description ==
        "\n### Columns\n**foo**: `int64`\n+ 0 < values < 100\n+ No Null values allowed.\n\n"
    )
Ejemplo n.º 12
0
        PandasColumn(name='qux',
                     constraints=[ColumnDTypeInSetConstraint({'object'})],
                     is_required=False),
    ]
    dataframe = DataFrame({'foo': ['bar', 'baz']})
    assert validate_constraints(dataframe,
                                pandas_columns=column_constraints) is None


@pytest.mark.parametrize(
    'column_constraints, dataframe',
    [
        (
            [
                PandasColumn(
                    name='foo',
                    constraints=[ColumnDTypeInSetConstraint({'int64'})])
            ],
            DataFrame({'foo': ['bar', 'baz']}),
        ),
        (
            [
                PandasColumn(
                    name='foo',
                    constraints=[ColumnDTypeInSetConstraint({'object'})])
            ],
            DataFrame({'bar': ['bar', 'baz']}),
        ),
    ],
)
def test_validate_constraints_throw_error(column_constraints, dataframe):
Ejemplo n.º 13
0
        ),
    ]


TripDataFrameSchema = [
    PandasColumn.integer_column('bike_id', min_value=0),
    PandasColumn.datetime_column('start_time', min_datetime=Timestamp(year=2018, month=1, day=1),),
    PandasColumn.datetime_column('end_time', min_datetime=Timestamp(year=2018, month=1, day=1),),
    PandasColumn.string_column('interval_date'),
]


RawTripDataFrame = create_dagster_pandas_dataframe_type(
    name='RawTripDataFrame',
    columns=[
        PandasColumn(column.name)
        for column in TripDataFrameSchema
        if column.name != 'interval_date'
    ],
)


TripDataFrame = create_dagster_pandas_dataframe_type(
    name='TripDataFrame',
    columns=TripDataFrameSchema,
    event_metadata_fn=compute_trip_dataframe_event_metadata,
)


def compute_traffic_dataframe_event_metadata(dataframe):
    return [
Ejemplo n.º 14
0
def test_dataframe_description_generation_no_type_constraint():
    TestDataFrame = create_dagster_pandas_dataframe_type(
        name="TestDataFrame",
        columns=[PandasColumn(name="foo")],
    )
    assert TestDataFrame.description == "\n### Columns\n**foo**\n\n"
Ejemplo n.º 15
0

def test_validate_collection_schema_ok():
    collection_schema = [
        PandasColumn(name='foo', constraints=[ColumnTypeConstraint('object')]),
    ]
    dataframe = DataFrame({'foo': ['bar', 'baz']})
    assert validate_collection_schema(collection_schema, dataframe) is None


@pytest.mark.parametrize(
    'collection_schema, dataframe',
    [
        (
            [
                PandasColumn(name='foo',
                             constraints=[ColumnTypeConstraint('int64')])
            ],
            DataFrame({'foo': ['bar', 'baz']}),
        ),
        (
            [
                PandasColumn(name='foo',
                             constraints=[ColumnTypeConstraint('object')])
            ],
            DataFrame({'bar': ['bar', 'baz']}),
        ),
    ],
)
def test_validate_collection_schema_throw_error(collection_schema, dataframe):
    with pytest.raises(ConstraintViolationException):
        validate_collection_schema(collection_schema, dataframe)
Ejemplo n.º 16
0
def test_validate_collection_schema_ok():
    collection_schema = [
        PandasColumn(name='foo', constraints=[ColumnTypeConstraint('object')]),
    ]
    dataframe = DataFrame({'foo': ['bar', 'baz']})
    assert validate_collection_schema(collection_schema, dataframe) is None
Ejemplo n.º 17
0
def test_validate_constraints_ok():
    column_constraints = [
        PandasColumn(name="foo", constraints=[ColumnDTypeInSetConstraint({"object"})]),
    ]
    dataframe = DataFrame({"foo": ["bar", "baz"]})
    assert validate_constraints(dataframe, pandas_columns=column_constraints) is None
Ejemplo n.º 18
0
    PandasColumn.integer_column("bike_id", min_value=0),
    PandasColumn.datetime_column(
        "start_time",
        min_datetime=Timestamp(year=2017, month=1, day=1),
    ),
    PandasColumn.datetime_column(
        "end_time",
        min_datetime=Timestamp(year=2017, month=1, day=1),
    ),
    PandasColumn.string_column("interval_date"),
]

RawTripDataFrame = create_dagster_pandas_dataframe_type(
    name="RawTripDataFrame",
    columns=[
        PandasColumn(column.name) for column in TripDataFrameSchema
        if column.name != "interval_date"
    ],
)

TripDataFrame = create_dagster_pandas_dataframe_type(
    name="TripDataFrame",
    columns=TripDataFrameSchema,
    event_metadata_fn=compute_trip_dataframe_event_metadata,
)


def compute_traffic_dataframe_event_metadata(dataframe):
    return [
        EventMetadataEntry.text(str(min(dataframe["peak_traffic_load"])),
                                "min_traffic_load", "Best Peak Load"),
Ejemplo n.º 19
0
def test_missing_column_validation_with_optional_column():
    column_constraints = [
        PandasColumn(
            name="qux", constraints=[ColumnDTypeInSetConstraint({"object"})], is_required=False
        ),
    ]
    dataframe = DataFrame({"foo": ["bar", "baz"]})
    assert validate_constraints(dataframe, pandas_columns=column_constraints) is None


@pytest.mark.parametrize(
    "column_constraints, dataframe",
    [
        (
            [PandasColumn(name="foo", constraints=[ColumnDTypeInSetConstraint({"int64"})])],
            DataFrame({"foo": ["bar", "baz"]}),
        ),
        (
            [PandasColumn(name="foo", constraints=[ColumnDTypeInSetConstraint({"object"})])],
            DataFrame({"bar": ["bar", "baz"]}),
        ),
    ],
)
def test_validate_constraints_throw_error(column_constraints, dataframe):
    with pytest.raises(ConstraintViolationException):
        validate_constraints(dataframe, pandas_columns=column_constraints)


def test_shape_validation_ok():
    assert (