def test_missing_reference():
    expr = Expressions.and_(Expressions.equal("t", 5),
                            Expressions.equal("x", 7))
    try:
        Binder.bind(STRUCT, expr)
    except ice_ex.ValidationException as e:
        assert "Cannot find field 't' in struct" in "{}".format(e)
Ejemplo n.º 2
0
def test_ternary_condition():
    expected_expr = Expressions.and_(
        Expressions.equal("col_a", 1),
        Expressions.and_(Expressions.equal("col_b", 2),
                         Expressions.equal("col_c", 3)))

    conv_expr = Expressions.convert_string_to_expr(
        "col_a=1 and col_b=2 and col_c=3")
    assert expected_expr == conv_expr
Ejemplo n.º 3
0
def test_precedence_explicit():
    expected_expr = Expressions.and_(
        Expressions.equal("col_a", 1),
        Expressions.or_(Expressions.equal("col_b", 2),
                        Expressions.equal("col_c", 3)))

    conv_expr = Expressions.convert_string_to_expr(
        "col_a=1 and (col_b=2 or col_c=3)")
    assert expected_expr == conv_expr
Ejemplo n.º 4
0
def test_precedence_opposite_order():
    expected_expr = Expressions.or_(
        Expressions.and_(Expressions.equal("col_a", 1),
                         Expressions.equal("col_b", 2)),
        Expressions.equal("col_c", 3))

    conv_expr = Expressions.convert_string_to_expr(
        "col_a=1 and col_b=2 or col_c=3")
    assert expected_expr == conv_expr
Ejemplo n.º 5
0
def test_complex_expansion():
    expected_expr = Expressions.or_(
        Expressions.and_(
            Expressions.equal("a", 1),
            Expressions.and_(Expressions.equal("b", 2),
                             Expressions.not_equal("c", 3))),
        Expressions.is_null("d"))
    conv_expr = Expressions.convert_string_to_expr(
        "(a=1 and b=2 and c<>3) or d is null")
    assert expected_expr == conv_expr
def test_complex_expr():
    expr = Expressions.or_(
        Expressions.and_(Expressions.greater_than('a', 1),
                         Expressions.equal("b", "US")),
        Expressions.equal("c", True))

    translated_dataset_filter = get_dataset_filter(expr, {
        'a': 'a',
        'b': 'b',
        'c': 'c'
    })
    dataset_filter = (((ds.field("a") > 1) & (ds.field("b") == "US")) |
                      (ds.field("c") == True))  # noqa: E712
    assert dataset_filter.equals(translated_dataset_filter)
Ejemplo n.º 7
0
def test_case_sensitive_int_not_eq_rewritten(inc_man_spec, inc_man_file, val,
                                             expected):
    with pytest.raises(ValidationException):
        assert InclusiveManifestEvaluator(
            inc_man_spec,
            Expressions.not_(Expressions.equal("ID", val)),
            case_sensitive=True).eval(inc_man_file) == expected
def test_multiple_references(assert_all_bound):
    expr = Expressions.or_(
        Expressions.and_(Expressions.equal("x", 7),
                         Expressions.less_than("y", 100)),
        Expressions.greater_than("z", -100))

    assert_all_bound("Multiple references", Binder.bind(STRUCT, expr))
Ejemplo n.º 9
0
def test_precedence_with_between():
    expected_expr = Expressions.or_(
        Expressions.and_(Expressions.greater_than_or_equal("col_a", 1),
                         Expressions.less_than_or_equal("col_a", 2)),
        Expressions.equal("col_c", 3))

    conv_expr = Expressions.convert_string_to_expr(
        "col_a between 1 and 2 or col_c=3")
    assert expected_expr == conv_expr
def test_integer_eq(schema, file):
    assert not InclusiveMetricsEvaluator(schema, Expressions.equal("id", 5)).eval(file)
    assert not InclusiveMetricsEvaluator(schema, Expressions.equal("id", 29)).eval(file)
    assert not InclusiveMetricsEvaluator(schema, Expressions.equal("id", 80)).eval(file)
    assert not InclusiveMetricsEvaluator(schema, Expressions.equal("id", 85)).eval(file)
    assert InclusiveMetricsEvaluator(schema, Expressions.equal("id", 30)).eval(file)
    assert InclusiveMetricsEvaluator(schema, Expressions.equal("id", 75)).eval(file)
    assert InclusiveMetricsEvaluator(schema, Expressions.equal("id", 79)).eval(file)
def test_not_eq_rewritten(strict_schema, strict_file):
    assert StrictMetricsEvaluator(strict_schema, Expressions.not_(Expressions.equal("id", 5))).eval(strict_file)
    assert StrictMetricsEvaluator(strict_schema, Expressions.not_(Expressions.equal("id", 29))).eval(strict_file)
    assert not StrictMetricsEvaluator(strict_schema, Expressions.not_(Expressions.equal("id", 30))).eval(strict_file)
    assert not StrictMetricsEvaluator(strict_schema, Expressions.not_(Expressions.equal("id", 75))).eval(strict_file)
    assert not StrictMetricsEvaluator(strict_schema, Expressions.not_(Expressions.equal("id", 79))).eval(strict_file)
    assert StrictMetricsEvaluator(strict_schema, Expressions.not_(Expressions.equal("id", 80))).eval(strict_file)
    assert StrictMetricsEvaluator(strict_schema, Expressions.not_(Expressions.equal("id", 85))).eval(strict_file)
Ejemplo n.º 12
0
def test_compound_filter(primitive_type_test_file):
    expected_schema = Schema([
        NestedField.required(1, "int_col", IntegerType.get()),
        NestedField.optional(2, "bigint_col", LongType.get()),
        NestedField.optional(4, "float_col", FloatType.get()),
        NestedField.optional(5, "dbl_col", DoubleType.get()),
        NestedField.optional(3, "string_col", StringType.get())
    ])

    input_file = FileSystemInputFile(get_fs(primitive_type_test_file, conf={}),
                                     primitive_type_test_file, {})
    reader = ParquetReader(
        input_file, expected_schema, {},
        Expressions.and_(Expressions.equal("string_col", "us"),
                         Expressions.equal("int_col", 1)), True)
    pyarrow_array = [
        pa.array([1], type=pa.int32()),
        pa.array([1], type=pa.int64()),
        pa.array([1.0], type=pa.float32()),
        pa.array([1.0], type=pa.float64()),
        pa.array(['us'], type=pa.string())
    ]

    source_table = pa.table(pyarrow_array,
                            schema=pa.schema([
                                pa.field("int_col", pa.int32(),
                                         nullable=False),
                                pa.field("bigint_col",
                                         pa.int64(),
                                         nullable=True),
                                pa.field("float_col",
                                         pa.float32(),
                                         nullable=True),
                                pa.field("dbl_col",
                                         pa.float64(),
                                         nullable=True),
                                pa.field("string_col",
                                         pa.string(),
                                         nullable=True)
                            ]))

    target_table = reader.read()
    assert source_table == target_table
Ejemplo n.º 13
0
def test_not(assert_all_bound, assert_and_unwrap):
    expr = Expressions.not_(Expressions.equal("x", 7))
    bound_expr = Binder.bind(STRUCT, expr)
    assert_all_bound("Not", bound_expr)

    not_ = assert_and_unwrap(bound_expr, Not)

    child = assert_and_unwrap(not_.child, None)
    # should bind x correctly
    assert 0 == child.ref.field_id
def test_missing_stats(strict_schema, missing_stats):
    exprs = [Expressions.less_than("no_stats", 5),
             Expressions.less_than_or_equal("no_stats", 30),
             Expressions.equal("no_stats", 70),
             Expressions.greater_than("no_stats", 78),
             Expressions.greater_than_or_equal("no_stats", 90),
             Expressions.not_equal("no_stats", 101),
             Expressions.is_null("no_stats"),
             Expressions.not_null("no_stats")]

    for expr in exprs:
        assert not StrictMetricsEvaluator(strict_schema, expr).eval(missing_stats)
def test_zero_record_file(strict_schema, empty):

    exprs = [Expressions.less_than("no_stats", 5),
             Expressions.less_than_or_equal("no_stats", 30),
             Expressions.equal("no_stats", 70),
             Expressions.greater_than("no_stats", 78),
             Expressions.greater_than_or_equal("no_stats", 90),
             Expressions.not_equal("no_stats", 101),
             Expressions.is_null("no_stats"),
             Expressions.not_null("no_stats")]
    for expr in exprs:
        assert StrictMetricsEvaluator(strict_schema, expr).eval(empty)
def test_integer_eq(strict_schema, strict_file):
    assert not StrictMetricsEvaluator(strict_schema, Expressions.equal("id", 5)).eval(strict_file)
    assert not StrictMetricsEvaluator(strict_schema, Expressions.equal("id", 30)).eval(strict_file)
    assert not StrictMetricsEvaluator(strict_schema, Expressions.equal("id", 75)).eval(strict_file)
    assert not StrictMetricsEvaluator(strict_schema, Expressions.equal("id", 79)).eval(strict_file)
    assert not StrictMetricsEvaluator(strict_schema, Expressions.equal("id", 80)).eval(strict_file)
    assert StrictMetricsEvaluator(strict_schema, Expressions.equal("always_5", 5)).eval(strict_file)
Ejemplo n.º 17
0
def test_and(assert_all_bound, assert_and_unwrap):
    expr = Expressions.and_(Expressions.equal("x", 7),
                            Expressions.less_than("y", 100))
    bound_expr = Binder.bind(STRUCT, expr)
    assert_all_bound("And", bound_expr)

    and_ = assert_and_unwrap(bound_expr, And)

    left = assert_and_unwrap(and_.left, None)
    # should bind x correctly
    assert 0 == left.ref.field_id
    right = assert_and_unwrap(and_.right, None)
    # should bind y correctly
    assert 1 == right.ref.field_id
Ejemplo n.º 18
0
def test_case_sensitive_reference():
    with raises(ice_ex.ValidationException):
        expr = Expressions.not_(Expressions.equal("X", 7))
        Binder.bind(STRUCT, expr, case_sensitive=True)
Ejemplo n.º 19
0
def test_case_insensitive_reference(assert_all_bound):
    expr = Expressions.not_(Expressions.equal("X", 7))
    assert_all_bound("Single reference",
                     Binder.bind(STRUCT, expr, case_sensitive=False))
Ejemplo n.º 20
0
def test_single_reference(assert_all_bound):
    expr = Expressions.not_(Expressions.equal("x", 7))
    assert_all_bound("Single reference", Binder.bind(STRUCT, expr))
Ejemplo n.º 21
0
def test_bound_expression_fails():
    with raises(RuntimeError):
        expr = Expressions.not_(Expressions.equal("x", 7))
        Binder.bind(STRUCT, Binder.bind(STRUCT, expr))
Ejemplo n.º 22
0
def test_compound_not_equal():
    expected_expr = Expressions.not_(Expressions.equal("col_a", 7))
    conv_expr = Expressions.convert_string_to_expr("not (col_a = 7)")
    assert expected_expr == conv_expr
Ejemplo n.º 23
0
def test_and():
    expected_expr = Expressions.and_(Expressions.equal("col_a", 1),
                                     Expressions.equal("col_b", 2))
    conv_expr = Expressions.convert_string_to_expr("col_a=1 and col_b=2")
    assert expected_expr == conv_expr
Ejemplo n.º 24
0
def test_equal_alt_syntax():
    expected_expr = Expressions.equal("col_a", 1)
    conv_expr = Expressions.convert_string_to_expr("col_a==1")
    assert expected_expr == conv_expr
def test_int_not_eq_rewritten(inc_man_spec, inc_man_file, val, expected):
    assert InclusiveManifestEvaluator(inc_man_spec,
                                      Expressions.not_(Expressions.equal("id", val))).eval(inc_man_file) == expected
def test_case_insensitive_int_not_eq_rewritten(inc_man_spec, inc_man_file, val, expected):
    assert InclusiveManifestEvaluator(inc_man_spec,
                                      Expressions.not_(Expressions.equal("ID", val)),
                                      case_sensitive=False).eval(inc_man_file) == expected
Ejemplo n.º 27
0
def test_str_equal_double_quotes():
    expected_expr = Expressions.equal("col_a", "123")
    conv_expr = Expressions.convert_string_to_expr("col_a=\"123\"")
    assert expected_expr == conv_expr
Ejemplo n.º 28
0
                    Operation.LT, Operation.LT_EQ, Operation.GT,
                    Operation.GT_EQ, Operation.EQ, Operation.NOT_EQ
                ])
def op(request):
    yield request.param


@pytest.fixture(scope="session",
                params=[
                    Expressions.always_false(),
                    Expressions.always_true(),
                    Expressions.less_than("x", 5),
                    Expressions.less_than_or_equal("y", -3),
                    Expressions.greater_than("z", 0),
                    Expressions.greater_than_or_equal("t", 129),
                    Expressions.equal("col", "data"),
                    Expressions.not_equal("col", "abc"),
                    Expressions.not_null("maybeNull"),
                    Expressions.is_null("maybeNull2"),
                    Expressions.not_(Expressions.greater_than("a", 10)),
                    Expressions.and_(Expressions.greater_than_or_equal("a", 0),
                                     Expressions.less_than("a", 3)),
                    Expressions.or_(Expressions.less_than("a", 0),
                                    Expressions.greater_than("a", 10)),
                    Expressions.equal("a", 5).bind(exp_schema.as_struct())
                ])
def expression(request):
    yield request.param


@pytest.fixture(scope="session",
Ejemplo n.º 29
0
def test_or():
    expected_expr = Expressions.or_(Expressions.equal("col_a", 1),
                                    Expressions.equal("col_b", 2))
    conv_expr = Expressions.convert_string_to_expr("col_a=1 or col_b=2")
    assert expected_expr == conv_expr
Ejemplo n.º 30
0
import pytest


@pytest.mark.parametrize(
    "expr, dataset_filter, column_map",
    [(Expressions.greater_than('a', 1), ds.field('a') > 1, {
        'a': 'a'
    }),
     (Expressions.greater_than_or_equal('a', 1), ds.field('a') >= 1, {
         'a': 'a'
     }), (Expressions.less_than('a', 1), ds.field('a') < 1, {
         'a': 'a'
     }),
     (Expressions.less_than_or_equal('a', 1), ds.field('a') <= 1, {
         'a': 'a'
     }), (Expressions.equal('a', 1), ds.field('a') == 1, {
         'a': 'a'
     }), (Expressions.not_equal('a', 1), ds.field('a') != 1, {
         'a': 'a'
     }), (Expressions.not_null('a'), ds.field('a').is_valid(), {
         'a': 'a'
     }), (Expressions.is_null('a'), ~ds.field('a').is_valid(), {
         'a': 'a'
     })])
def test_simple(expr, dataset_filter, column_map):
    translated_dataset_filter = get_dataset_filter(expr, column_map)
    assert dataset_filter.equals(translated_dataset_filter)


def test_not_conversion():
    expr = Expressions.not_(Expressions.greater_than('a', 1))