def test_or(schema, file):
    assert not InclusiveMetricsEvaluator(schema,
                                         Expressions.or_(Expressions.less_than("id", 5),
                                                         Expressions.greater_than_or_equal("id", 80))).eval(file)
    assert InclusiveMetricsEvaluator(schema,
                                     Expressions.or_(Expressions.less_than("id", 5),
                                                     Expressions.greater_than_or_equal("id", 60))).eval(file)
def test_or(strict_schema, strict_file):
    assert not StrictMetricsEvaluator(strict_schema,
                                      Expressions.or_(Expressions.less_than("id", 5),
                                                      Expressions.greater_than_or_equal("id", 80))).eval(strict_file)
    assert not StrictMetricsEvaluator(strict_schema,
                                      Expressions.or_(Expressions.less_than("id", 5),
                                                      Expressions.greater_than_or_equal("id", 60))).eval(strict_file)
    assert StrictMetricsEvaluator(strict_schema,
                                  Expressions.or_(Expressions.less_than("id", 5),
                                                  Expressions.greater_than_or_equal("id", 30))).eval(strict_file)
def test_integer_lt(schema, file):
    assert not InclusiveMetricsEvaluator(schema, Expressions.less_than(
        "id", 5)).eval(file)
    assert not InclusiveMetricsEvaluator(schema, Expressions.less_than(
        "id", 30)).eval(file)
    assert InclusiveMetricsEvaluator(schema,
                                     Expressions.less_than("id",
                                                           31)).eval(file)
    assert InclusiveMetricsEvaluator(schema,
                                     Expressions.less_than("id",
                                                           79)).eval(file)
def test_multiple_references(assert_all_bound):
    expr = Expressions.or_(
        Expressions.and_(Expressions.equal("x", 7),
                         Expressions.less_than("y", 100)),
        Expressions.greater_than("z", -100))

    assert_all_bound("Multiple references", Binder.bind(STRUCT, expr))
def test_basic_simplification(assert_and_unwrap):
    # Should simplify or expression to alwaysTrue
    assert Expressions.always_true() == Binder.bind(
        STRUCT,
        Expressions.or_(Expressions.less_than("y", 100),
                        Expressions.greater_than("z", -9999999999)))
    # Should simplify or expression to alwaysfalse
    assert Expressions.always_false() == Binder.bind(
        STRUCT,
        Expressions.and_(Expressions.less_than("y", 100),
                         Expressions.less_than("z", -9999999999)))

    bound = Binder.bind(
        STRUCT,
        Expressions.not_(Expressions.not_(Expressions.less_than("y", 100))))
    pred = assert_and_unwrap(bound, None)
    assert 1 == pred.ref.field_id
def test_zero_record_file(strict_schema, empty):

    exprs = [Expressions.less_than("no_stats", 5),
             Expressions.less_than_or_equal("no_stats", 30),
             Expressions.equal("no_stats", 70),
             Expressions.greater_than("no_stats", 78),
             Expressions.greater_than_or_equal("no_stats", 90),
             Expressions.not_equal("no_stats", 101),
             Expressions.is_null("no_stats"),
             Expressions.not_null("no_stats")]
    for expr in exprs:
        assert StrictMetricsEvaluator(strict_schema, expr).eval(empty)
def test_missing_stats(strict_schema, missing_stats):
    exprs = [Expressions.less_than("no_stats", 5),
             Expressions.less_than_or_equal("no_stats", 30),
             Expressions.equal("no_stats", 70),
             Expressions.greater_than("no_stats", 78),
             Expressions.greater_than_or_equal("no_stats", 90),
             Expressions.not_equal("no_stats", 101),
             Expressions.is_null("no_stats"),
             Expressions.not_null("no_stats")]

    for expr in exprs:
        assert not StrictMetricsEvaluator(strict_schema, expr).eval(missing_stats)
def test_or(assert_all_bound, assert_and_unwrap):
    expr = Expressions.or_(Expressions.greater_than("z", -100),
                           Expressions.less_than("y", 100))
    bound_expr = Binder.bind(STRUCT, expr)
    assert_all_bound("Or", bound_expr)

    or_ = assert_and_unwrap(bound_expr, Or)

    left = assert_and_unwrap(or_.left, None)
    # should bind z correctly
    assert 2 == left.ref.field_id
    right = assert_and_unwrap(or_.right, None)
    # should bind y correctly
    assert 1 == right.ref.field_id
def test_and(assert_all_bound, assert_and_unwrap):
    expr = Expressions.and_(Expressions.equal("x", 7),
                            Expressions.less_than("y", 100))
    bound_expr = Binder.bind(STRUCT, expr)
    assert_all_bound("And", bound_expr)

    and_ = assert_and_unwrap(bound_expr, And)

    left = assert_and_unwrap(and_.left, None)
    # should bind x correctly
    assert 0 == left.ref.field_id
    right = assert_and_unwrap(and_.right, None)
    # should bind y correctly
    assert 1 == right.ref.field_id
def test_missing_column(inc_man_spec, inc_man_file):
    with pytest.raises(ValidationException):
        InclusiveManifestEvaluator(inc_man_spec, Expressions.less_than("missing", 5)).eval(inc_man_file)
def test_integer_lt(strict_schema, strict_file):
    assert not StrictMetricsEvaluator(strict_schema, Expressions.less_than("id", 5)).eval(strict_file)
    assert not StrictMetricsEvaluator(strict_schema, Expressions.less_than("id", 31)).eval(strict_file)
    assert not StrictMetricsEvaluator(strict_schema, Expressions.less_than("id", 79)).eval(strict_file)
    assert StrictMetricsEvaluator(strict_schema, Expressions.less_than("id", 80)).eval(strict_file)
Beispiel #12
0
def test_lt():
    expected_expr = Expressions.less_than("col_a", 1)
    conv_expr = Expressions.convert_string_to_expr("col_a < 1")
    assert expected_expr == conv_expr
# under the License.

from iceberg.api.expressions import Expressions
from iceberg.parquet.dataset_utils import get_dataset_filter
import pyarrow.dataset as ds
import pytest


@pytest.mark.parametrize(
    "expr, dataset_filter, column_map",
    [(Expressions.greater_than('a', 1), ds.field('a') > 1, {
        'a': 'a'
    }),
     (Expressions.greater_than_or_equal('a', 1), ds.field('a') >= 1, {
         'a': 'a'
     }), (Expressions.less_than('a', 1), ds.field('a') < 1, {
         'a': 'a'
     }),
     (Expressions.less_than_or_equal('a', 1), ds.field('a') <= 1, {
         'a': 'a'
     }), (Expressions.equal('a', 1), ds.field('a') == 1, {
         'a': 'a'
     }), (Expressions.not_equal('a', 1), ds.field('a') != 1, {
         'a': 'a'
     }), (Expressions.not_null('a'), ds.field('a').is_valid(), {
         'a': 'a'
     }), (Expressions.is_null('a'), ~ds.field('a').is_valid(), {
         'a': 'a'
     })])
def test_simple(expr, dataset_filter, column_map):
    translated_dataset_filter = get_dataset_filter(expr, column_map)
def test_missing_column(strict_schema, strict_file):
    with raises(ValidationException):
        StrictMetricsEvaluator(strict_schema, Expressions.less_than("missing", 5)).eval(strict_file)
def test_not(schema, file):
    assert InclusiveMetricsEvaluator(schema, Expressions.not_(Expressions.less_than("id", 5))).eval(file)
    assert not InclusiveMetricsEvaluator(schema,
                                         Expressions.not_(Expressions.greater_than("id", 5))).eval(file)
def test_missing_column(schema, file):
    with raises(RuntimeError):
        InclusiveMetricsEvaluator(schema, Expressions.less_than("missing", 5)).eval(file)

@pytest.fixture(scope="session",
                params=[
                    Operation.LT, Operation.LT_EQ, Operation.GT,
                    Operation.GT_EQ, Operation.EQ, Operation.NOT_EQ
                ])
def op(request):
    yield request.param


@pytest.fixture(scope="session",
                params=[
                    Expressions.always_false(),
                    Expressions.always_true(),
                    Expressions.less_than("x", 5),
                    Expressions.less_than_or_equal("y", -3),
                    Expressions.greater_than("z", 0),
                    Expressions.greater_than_or_equal("t", 129),
                    Expressions.equal("col", "data"),
                    Expressions.not_equal("col", "abc"),
                    Expressions.not_null("maybeNull"),
                    Expressions.is_null("maybeNull2"),
                    Expressions.not_(Expressions.greater_than("a", 10)),
                    Expressions.and_(Expressions.greater_than_or_equal("a", 0),
                                     Expressions.less_than("a", 3)),
                    Expressions.or_(Expressions.less_than("a", 0),
                                    Expressions.greater_than("a", 10)),
                    Expressions.equal("a", 5).bind(exp_schema.as_struct())
                ])
def expression(request):
def test_not(strict_schema, strict_file):
    assert StrictMetricsEvaluator(strict_schema,
                                  Expressions.not_(Expressions.less_than("id", 5))).eval(strict_file)
    assert not StrictMetricsEvaluator(strict_schema,
                                      Expressions.not_(Expressions.greater_than("id", 5))).eval(strict_file)
@pytest.mark.parametrize("expression,expected", [
    (Expressions.is_null("all_nulls"), True),
    (Expressions.is_null("some_nulls"), True),
    (Expressions.is_null("no_nulls"), False)])
def test_no_nulls(inc_man_spec, inc_man_file, expression, expected):
    assert InclusiveManifestEvaluator(inc_man_spec, expression).eval(inc_man_file) == expected


def test_missing_column(inc_man_spec, inc_man_file):
    with pytest.raises(ValidationException):
        InclusiveManifestEvaluator(inc_man_spec, Expressions.less_than("missing", 5)).eval(inc_man_file)


@pytest.mark.parametrize("expression", [
    Expressions.less_than("id", 5),
    Expressions.less_than_or_equal("id", 30),
    Expressions.equal("id", 70),
    Expressions.greater_than("id", 78),
    Expressions.greater_than_or_equal("id", 90),
    Expressions.not_equal("id", 101),
    Expressions.less_than_or_equal("id", 30),
    Expressions.is_null("id"),
    Expressions.not_null("id")])
def test_missing_stats(inc_man_spec, inc_man_file_ns, expression):
    assert InclusiveManifestEvaluator(inc_man_spec, expression).eval(inc_man_file_ns)


@pytest.mark.parametrize("expression, expected", [
    (Expressions.less_than("id", 5), True),
    (Expressions.greater_than("id", 5), False)])
def test_int_lt(inc_man_spec, inc_man_file, val, expected):
    assert InclusiveManifestEvaluator(inc_man_spec, Expressions.less_than("id", val)).eval(inc_man_file) == expected
Beispiel #21
0
                          (Expressions.is_null("some_nulls"), True),
                          (Expressions.is_null("no_nulls"), False)])
def test_no_nulls(inc_man_spec, inc_man_file, expression, expected):
    assert InclusiveManifestEvaluator(
        inc_man_spec, expression).eval(inc_man_file) == expected


def test_missing_column(inc_man_spec, inc_man_file):
    with pytest.raises(ValidationException):
        InclusiveManifestEvaluator(inc_man_spec,
                                   Expressions.less_than("missing",
                                                         5)).eval(inc_man_file)


@pytest.mark.parametrize("expression", [
    Expressions.less_than("id", 5),
    Expressions.less_than_or_equal("id", 30),
    Expressions.equal("id", 70),
    Expressions.greater_than("id", 78),
    Expressions.greater_than_or_equal("id", 90),
    Expressions.not_equal("id", 101),
    Expressions.less_than_or_equal("id", 30),
    Expressions.is_null("id"),
    Expressions.not_null("id")
])
def test_missing_stats(inc_man_spec, inc_man_file_ns, expression):
    assert InclusiveManifestEvaluator(inc_man_spec,
                                      expression).eval(inc_man_file_ns)


@pytest.mark.parametrize("expression, expected",