def test_or(schema, file): assert not InclusiveMetricsEvaluator(schema, Expressions.or_(Expressions.less_than("id", 5), Expressions.greater_than_or_equal("id", 80))).eval(file) assert InclusiveMetricsEvaluator(schema, Expressions.or_(Expressions.less_than("id", 5), Expressions.greater_than_or_equal("id", 60))).eval(file)
def test_or(strict_schema, strict_file): assert not StrictMetricsEvaluator(strict_schema, Expressions.or_(Expressions.less_than("id", 5), Expressions.greater_than_or_equal("id", 80))).eval(strict_file) assert not StrictMetricsEvaluator(strict_schema, Expressions.or_(Expressions.less_than("id", 5), Expressions.greater_than_or_equal("id", 60))).eval(strict_file) assert StrictMetricsEvaluator(strict_schema, Expressions.or_(Expressions.less_than("id", 5), Expressions.greater_than_or_equal("id", 30))).eval(strict_file)
def test_integer_lt(schema, file): assert not InclusiveMetricsEvaluator(schema, Expressions.less_than( "id", 5)).eval(file) assert not InclusiveMetricsEvaluator(schema, Expressions.less_than( "id", 30)).eval(file) assert InclusiveMetricsEvaluator(schema, Expressions.less_than("id", 31)).eval(file) assert InclusiveMetricsEvaluator(schema, Expressions.less_than("id", 79)).eval(file)
def test_multiple_references(assert_all_bound): expr = Expressions.or_( Expressions.and_(Expressions.equal("x", 7), Expressions.less_than("y", 100)), Expressions.greater_than("z", -100)) assert_all_bound("Multiple references", Binder.bind(STRUCT, expr))
def test_basic_simplification(assert_and_unwrap): # Should simplify or expression to alwaysTrue assert Expressions.always_true() == Binder.bind( STRUCT, Expressions.or_(Expressions.less_than("y", 100), Expressions.greater_than("z", -9999999999))) # Should simplify or expression to alwaysfalse assert Expressions.always_false() == Binder.bind( STRUCT, Expressions.and_(Expressions.less_than("y", 100), Expressions.less_than("z", -9999999999))) bound = Binder.bind( STRUCT, Expressions.not_(Expressions.not_(Expressions.less_than("y", 100)))) pred = assert_and_unwrap(bound, None) assert 1 == pred.ref.field_id
def test_zero_record_file(strict_schema, empty): exprs = [Expressions.less_than("no_stats", 5), Expressions.less_than_or_equal("no_stats", 30), Expressions.equal("no_stats", 70), Expressions.greater_than("no_stats", 78), Expressions.greater_than_or_equal("no_stats", 90), Expressions.not_equal("no_stats", 101), Expressions.is_null("no_stats"), Expressions.not_null("no_stats")] for expr in exprs: assert StrictMetricsEvaluator(strict_schema, expr).eval(empty)
def test_missing_stats(strict_schema, missing_stats): exprs = [Expressions.less_than("no_stats", 5), Expressions.less_than_or_equal("no_stats", 30), Expressions.equal("no_stats", 70), Expressions.greater_than("no_stats", 78), Expressions.greater_than_or_equal("no_stats", 90), Expressions.not_equal("no_stats", 101), Expressions.is_null("no_stats"), Expressions.not_null("no_stats")] for expr in exprs: assert not StrictMetricsEvaluator(strict_schema, expr).eval(missing_stats)
def test_or(assert_all_bound, assert_and_unwrap): expr = Expressions.or_(Expressions.greater_than("z", -100), Expressions.less_than("y", 100)) bound_expr = Binder.bind(STRUCT, expr) assert_all_bound("Or", bound_expr) or_ = assert_and_unwrap(bound_expr, Or) left = assert_and_unwrap(or_.left, None) # should bind z correctly assert 2 == left.ref.field_id right = assert_and_unwrap(or_.right, None) # should bind y correctly assert 1 == right.ref.field_id
def test_and(assert_all_bound, assert_and_unwrap): expr = Expressions.and_(Expressions.equal("x", 7), Expressions.less_than("y", 100)) bound_expr = Binder.bind(STRUCT, expr) assert_all_bound("And", bound_expr) and_ = assert_and_unwrap(bound_expr, And) left = assert_and_unwrap(and_.left, None) # should bind x correctly assert 0 == left.ref.field_id right = assert_and_unwrap(and_.right, None) # should bind y correctly assert 1 == right.ref.field_id
def test_missing_column(inc_man_spec, inc_man_file): with pytest.raises(ValidationException): InclusiveManifestEvaluator(inc_man_spec, Expressions.less_than("missing", 5)).eval(inc_man_file)
def test_integer_lt(strict_schema, strict_file): assert not StrictMetricsEvaluator(strict_schema, Expressions.less_than("id", 5)).eval(strict_file) assert not StrictMetricsEvaluator(strict_schema, Expressions.less_than("id", 31)).eval(strict_file) assert not StrictMetricsEvaluator(strict_schema, Expressions.less_than("id", 79)).eval(strict_file) assert StrictMetricsEvaluator(strict_schema, Expressions.less_than("id", 80)).eval(strict_file)
def test_lt(): expected_expr = Expressions.less_than("col_a", 1) conv_expr = Expressions.convert_string_to_expr("col_a < 1") assert expected_expr == conv_expr
# under the License. from iceberg.api.expressions import Expressions from iceberg.parquet.dataset_utils import get_dataset_filter import pyarrow.dataset as ds import pytest @pytest.mark.parametrize( "expr, dataset_filter, column_map", [(Expressions.greater_than('a', 1), ds.field('a') > 1, { 'a': 'a' }), (Expressions.greater_than_or_equal('a', 1), ds.field('a') >= 1, { 'a': 'a' }), (Expressions.less_than('a', 1), ds.field('a') < 1, { 'a': 'a' }), (Expressions.less_than_or_equal('a', 1), ds.field('a') <= 1, { 'a': 'a' }), (Expressions.equal('a', 1), ds.field('a') == 1, { 'a': 'a' }), (Expressions.not_equal('a', 1), ds.field('a') != 1, { 'a': 'a' }), (Expressions.not_null('a'), ds.field('a').is_valid(), { 'a': 'a' }), (Expressions.is_null('a'), ~ds.field('a').is_valid(), { 'a': 'a' })]) def test_simple(expr, dataset_filter, column_map): translated_dataset_filter = get_dataset_filter(expr, column_map)
def test_missing_column(strict_schema, strict_file): with raises(ValidationException): StrictMetricsEvaluator(strict_schema, Expressions.less_than("missing", 5)).eval(strict_file)
def test_not(schema, file): assert InclusiveMetricsEvaluator(schema, Expressions.not_(Expressions.less_than("id", 5))).eval(file) assert not InclusiveMetricsEvaluator(schema, Expressions.not_(Expressions.greater_than("id", 5))).eval(file)
def test_missing_column(schema, file): with raises(RuntimeError): InclusiveMetricsEvaluator(schema, Expressions.less_than("missing", 5)).eval(file)
@pytest.fixture(scope="session", params=[ Operation.LT, Operation.LT_EQ, Operation.GT, Operation.GT_EQ, Operation.EQ, Operation.NOT_EQ ]) def op(request): yield request.param @pytest.fixture(scope="session", params=[ Expressions.always_false(), Expressions.always_true(), Expressions.less_than("x", 5), Expressions.less_than_or_equal("y", -3), Expressions.greater_than("z", 0), Expressions.greater_than_or_equal("t", 129), Expressions.equal("col", "data"), Expressions.not_equal("col", "abc"), Expressions.not_null("maybeNull"), Expressions.is_null("maybeNull2"), Expressions.not_(Expressions.greater_than("a", 10)), Expressions.and_(Expressions.greater_than_or_equal("a", 0), Expressions.less_than("a", 3)), Expressions.or_(Expressions.less_than("a", 0), Expressions.greater_than("a", 10)), Expressions.equal("a", 5).bind(exp_schema.as_struct()) ]) def expression(request):
def test_not(strict_schema, strict_file): assert StrictMetricsEvaluator(strict_schema, Expressions.not_(Expressions.less_than("id", 5))).eval(strict_file) assert not StrictMetricsEvaluator(strict_schema, Expressions.not_(Expressions.greater_than("id", 5))).eval(strict_file)
@pytest.mark.parametrize("expression,expected", [ (Expressions.is_null("all_nulls"), True), (Expressions.is_null("some_nulls"), True), (Expressions.is_null("no_nulls"), False)]) def test_no_nulls(inc_man_spec, inc_man_file, expression, expected): assert InclusiveManifestEvaluator(inc_man_spec, expression).eval(inc_man_file) == expected def test_missing_column(inc_man_spec, inc_man_file): with pytest.raises(ValidationException): InclusiveManifestEvaluator(inc_man_spec, Expressions.less_than("missing", 5)).eval(inc_man_file) @pytest.mark.parametrize("expression", [ Expressions.less_than("id", 5), Expressions.less_than_or_equal("id", 30), Expressions.equal("id", 70), Expressions.greater_than("id", 78), Expressions.greater_than_or_equal("id", 90), Expressions.not_equal("id", 101), Expressions.less_than_or_equal("id", 30), Expressions.is_null("id"), Expressions.not_null("id")]) def test_missing_stats(inc_man_spec, inc_man_file_ns, expression): assert InclusiveManifestEvaluator(inc_man_spec, expression).eval(inc_man_file_ns) @pytest.mark.parametrize("expression, expected", [ (Expressions.less_than("id", 5), True), (Expressions.greater_than("id", 5), False)])
def test_int_lt(inc_man_spec, inc_man_file, val, expected): assert InclusiveManifestEvaluator(inc_man_spec, Expressions.less_than("id", val)).eval(inc_man_file) == expected
(Expressions.is_null("some_nulls"), True), (Expressions.is_null("no_nulls"), False)]) def test_no_nulls(inc_man_spec, inc_man_file, expression, expected): assert InclusiveManifestEvaluator( inc_man_spec, expression).eval(inc_man_file) == expected def test_missing_column(inc_man_spec, inc_man_file): with pytest.raises(ValidationException): InclusiveManifestEvaluator(inc_man_spec, Expressions.less_than("missing", 5)).eval(inc_man_file) @pytest.mark.parametrize("expression", [ Expressions.less_than("id", 5), Expressions.less_than_or_equal("id", 30), Expressions.equal("id", 70), Expressions.greater_than("id", 78), Expressions.greater_than_or_equal("id", 90), Expressions.not_equal("id", 101), Expressions.less_than_or_equal("id", 30), Expressions.is_null("id"), Expressions.not_null("id") ]) def test_missing_stats(inc_man_spec, inc_man_file_ns, expression): assert InclusiveManifestEvaluator(inc_man_spec, expression).eval(inc_man_file_ns) @pytest.mark.parametrize("expression, expected",