def test_validate_constraints_ok(): column_constraints = [ PandasColumn(name='foo', constraints=[ColumnTypeConstraint('object')]), ] dataframe = DataFrame({'foo': ['bar', 'baz']}) assert validate_constraints(dataframe, pandas_columns=column_constraints) is None
def test_missing_column_validation_with_optional_column(): column_constraints = [ PandasColumn( name="qux", constraints=[ColumnDTypeInSetConstraint({"object"})], is_required=False ), ] dataframe = DataFrame({"foo": ["bar", "baz"]}) assert validate_constraints(dataframe, pandas_columns=column_constraints) is None
def test_dataframe_description_generation_just_type_constraint(): TestDataFrame = create_dagster_pandas_dataframe_type( name='TestDataFrame', columns=[ PandasColumn(name='foo', constraints=[ColumnTypeConstraint('int64')]) ], ) assert TestDataFrame.description == "\n### Columns\n**foo**: `int64`\n\n"
def test_dataframe_description_generation_just_type_constraint(): TestDataFrame = create_dagster_pandas_dataframe_type( name="TestDataFrame", columns=[ PandasColumn(name="foo", constraints=[ColumnDTypeInSetConstraint({"int64"})]) ], ) assert TestDataFrame.description == "\n### Columns\n**foo**: `int64`\n\n"
def test_create_pandas_dataframe_dagster_type(): TestDataFrame = create_dagster_pandas_dataframe_type( name="TestDataFrame", columns=[ PandasColumn(name="foo", constraints=[ColumnDTypeInSetConstraint({"int64"})]) ], ) assert isinstance(TestDataFrame, DagsterType)
def test_missing_column_validation(): column_constraints = [ PandasColumn(name="qux", constraints=[ColumnDTypeInSetConstraint({"object"})]), ] dataframe = DataFrame({"foo": ["bar", "baz"]}) with pytest.raises( ConstraintViolationException, match="Required column qux not in dataframe with columns" ): validate_constraints(dataframe, pandas_columns=column_constraints)
def test_missing_column_validation_with_optional_column(): column_constraints = [ PandasColumn(name='qux', constraints=[ColumnDTypeInSetConstraint({'object'})], is_required=False), ] dataframe = DataFrame({'foo': ['bar', 'baz']}) assert validate_constraints(dataframe, pandas_columns=column_constraints) is None
def test_create_pandas_dataframe_dagster_type(): TestDataFrame = create_dagster_pandas_dataframe_type( name='TestDataFrame', columns=[ PandasColumn(name='foo', constraints=[ColumnTypeConstraint('int64')]) ], ) assert isinstance(TestDataFrame, RuntimeType)
def test_missing_column_validation(): column_constraints = [ PandasColumn(name='qux', constraints=[ColumnDTypeInSetConstraint({'object'})]), ] dataframe = DataFrame({'foo': ['bar', 'baz']}) with pytest.raises( ConstraintViolationException, match="Required column qux not in dataframe with columns"): validate_constraints(dataframe, pandas_columns=column_constraints)
def test_dataframe_description_generation_multi_constraints(): TestDataFrame = create_dagster_pandas_dataframe_type( name='TestDataFrame', columns=[ PandasColumn( name='foo', constraints=[ ColumnTypeConstraint('int64'), InRangeColumnConstraint(0, 100), NonNullableColumnConstraint(), ], ), ], ) assert ( TestDataFrame.description == "\n### Columns\n**foo**: `int64`\n+ 0 < values < 100\n+ No Null values allowed.\n\n" )
def test_dataframe_description_generation_multi_constraints(): TestDataFrame = create_dagster_pandas_dataframe_type( name="TestDataFrame", columns=[ PandasColumn( name="foo", constraints=[ ColumnDTypeInSetConstraint({"int64"}), InRangeColumnConstraint(0, 100, ignore_missing_vals=False), NonNullableColumnConstraint(), ], ), ], ) assert ( TestDataFrame.description == "\n### Columns\n**foo**: `int64`\n+ 0 < values < 100\n+ No Null values allowed.\n\n" )
PandasColumn(name='qux', constraints=[ColumnDTypeInSetConstraint({'object'})], is_required=False), ] dataframe = DataFrame({'foo': ['bar', 'baz']}) assert validate_constraints(dataframe, pandas_columns=column_constraints) is None @pytest.mark.parametrize( 'column_constraints, dataframe', [ ( [ PandasColumn( name='foo', constraints=[ColumnDTypeInSetConstraint({'int64'})]) ], DataFrame({'foo': ['bar', 'baz']}), ), ( [ PandasColumn( name='foo', constraints=[ColumnDTypeInSetConstraint({'object'})]) ], DataFrame({'bar': ['bar', 'baz']}), ), ], ) def test_validate_constraints_throw_error(column_constraints, dataframe):
), ] TripDataFrameSchema = [ PandasColumn.integer_column('bike_id', min_value=0), PandasColumn.datetime_column('start_time', min_datetime=Timestamp(year=2018, month=1, day=1),), PandasColumn.datetime_column('end_time', min_datetime=Timestamp(year=2018, month=1, day=1),), PandasColumn.string_column('interval_date'), ] RawTripDataFrame = create_dagster_pandas_dataframe_type( name='RawTripDataFrame', columns=[ PandasColumn(column.name) for column in TripDataFrameSchema if column.name != 'interval_date' ], ) TripDataFrame = create_dagster_pandas_dataframe_type( name='TripDataFrame', columns=TripDataFrameSchema, event_metadata_fn=compute_trip_dataframe_event_metadata, ) def compute_traffic_dataframe_event_metadata(dataframe): return [
def test_dataframe_description_generation_no_type_constraint(): TestDataFrame = create_dagster_pandas_dataframe_type( name="TestDataFrame", columns=[PandasColumn(name="foo")], ) assert TestDataFrame.description == "\n### Columns\n**foo**\n\n"
def test_validate_collection_schema_ok(): collection_schema = [ PandasColumn(name='foo', constraints=[ColumnTypeConstraint('object')]), ] dataframe = DataFrame({'foo': ['bar', 'baz']}) assert validate_collection_schema(collection_schema, dataframe) is None @pytest.mark.parametrize( 'collection_schema, dataframe', [ ( [ PandasColumn(name='foo', constraints=[ColumnTypeConstraint('int64')]) ], DataFrame({'foo': ['bar', 'baz']}), ), ( [ PandasColumn(name='foo', constraints=[ColumnTypeConstraint('object')]) ], DataFrame({'bar': ['bar', 'baz']}), ), ], ) def test_validate_collection_schema_throw_error(collection_schema, dataframe): with pytest.raises(ConstraintViolationException): validate_collection_schema(collection_schema, dataframe)
def test_validate_collection_schema_ok(): collection_schema = [ PandasColumn(name='foo', constraints=[ColumnTypeConstraint('object')]), ] dataframe = DataFrame({'foo': ['bar', 'baz']}) assert validate_collection_schema(collection_schema, dataframe) is None
def test_validate_constraints_ok(): column_constraints = [ PandasColumn(name="foo", constraints=[ColumnDTypeInSetConstraint({"object"})]), ] dataframe = DataFrame({"foo": ["bar", "baz"]}) assert validate_constraints(dataframe, pandas_columns=column_constraints) is None
PandasColumn.integer_column("bike_id", min_value=0), PandasColumn.datetime_column( "start_time", min_datetime=Timestamp(year=2017, month=1, day=1), ), PandasColumn.datetime_column( "end_time", min_datetime=Timestamp(year=2017, month=1, day=1), ), PandasColumn.string_column("interval_date"), ] RawTripDataFrame = create_dagster_pandas_dataframe_type( name="RawTripDataFrame", columns=[ PandasColumn(column.name) for column in TripDataFrameSchema if column.name != "interval_date" ], ) TripDataFrame = create_dagster_pandas_dataframe_type( name="TripDataFrame", columns=TripDataFrameSchema, event_metadata_fn=compute_trip_dataframe_event_metadata, ) def compute_traffic_dataframe_event_metadata(dataframe): return [ EventMetadataEntry.text(str(min(dataframe["peak_traffic_load"])), "min_traffic_load", "Best Peak Load"),
def test_missing_column_validation_with_optional_column(): column_constraints = [ PandasColumn( name="qux", constraints=[ColumnDTypeInSetConstraint({"object"})], is_required=False ), ] dataframe = DataFrame({"foo": ["bar", "baz"]}) assert validate_constraints(dataframe, pandas_columns=column_constraints) is None @pytest.mark.parametrize( "column_constraints, dataframe", [ ( [PandasColumn(name="foo", constraints=[ColumnDTypeInSetConstraint({"int64"})])], DataFrame({"foo": ["bar", "baz"]}), ), ( [PandasColumn(name="foo", constraints=[ColumnDTypeInSetConstraint({"object"})])], DataFrame({"bar": ["bar", "baz"]}), ), ], ) def test_validate_constraints_throw_error(column_constraints, dataframe): with pytest.raises(ConstraintViolationException): validate_constraints(dataframe, pandas_columns=column_constraints) def test_shape_validation_ok(): assert (