Example #1
0
def test_shape_validation_throw_error():
    with pytest.raises(ConstraintViolationException):
        validate_collection_schema(
            [
                PandasColumn.integer_column('foo', min_value=0),
                PandasColumn.string_column('bar')
            ],
            DataFrame({
                'foo': [2],
                'bar': ['hello']
            }),
            dataframe_constraints=[RowCountConstraint(2)],
        )
    def _dagster_type_check(value):
        if not isinstance(value, DataFrame):
            return TypeCheck(
                success=False,
                description=
                'Must be a pandas.DataFrame. Got value of type. {type_name}'.
                format(type_name=type(value).__name__),
            )

        if columns is not None:
            try:
                validate_collection_schema(columns, value)
            except ConstraintViolationException as e:
                return TypeCheck(success=False, description=str(e))

        return TypeCheck(
            success=True,
            metadata_entries=_execute_summary_stats(name, value,
                                                    summary_statistics)
            if summary_statistics else None,
        )
Example #3
0
def test_shape_validation_ok():
    assert (validate_collection_schema(
        [
            PandasColumn.integer_column('foo', min_value=0),
            PandasColumn.string_column('bar')
        ],
        DataFrame({
            'foo': [2],
            'bar': ['hello']
        }),
        dataframe_constraints=[RowCountConstraint(1)],
    ) is None)
Example #4
0
def test_validate_collection_schema_throw_error(collection_schema, dataframe):
    with pytest.raises(ConstraintViolationException):
        validate_collection_schema(collection_schema, dataframe)
Example #5
0
def test_validate_collection_schema_ok():
    collection_schema = [
        PandasColumn(name='foo', constraints=[ColumnTypeConstraint('object')]),
    ]
    dataframe = DataFrame({'foo': ['bar', 'baz']})
    assert validate_collection_schema(collection_schema, dataframe) is None