コード例 #1
0
def test_missing_reference():
    expr = Expressions.and_(Expressions.equal("t", 5),
                            Expressions.equal("x", 7))
    try:
        Binder.bind(STRUCT, expr)
    except ice_ex.ValidationException as e:
        assert "Cannot find field 't' in struct" in "{}".format(e)
コード例 #2
0
def test_ternary_condition():
    expected_expr = Expressions.and_(
        Expressions.equal("col_a", 1),
        Expressions.and_(Expressions.equal("col_b", 2),
                         Expressions.equal("col_c", 3)))

    conv_expr = Expressions.convert_string_to_expr(
        "col_a=1 and col_b=2 and col_c=3")
    assert expected_expr == conv_expr
コード例 #3
0
def test_precedence_explicit():
    expected_expr = Expressions.and_(
        Expressions.equal("col_a", 1),
        Expressions.or_(Expressions.equal("col_b", 2),
                        Expressions.equal("col_c", 3)))

    conv_expr = Expressions.convert_string_to_expr(
        "col_a=1 and (col_b=2 or col_c=3)")
    assert expected_expr == conv_expr
コード例 #4
0
def test_precedence_opposite_order():
    expected_expr = Expressions.or_(
        Expressions.and_(Expressions.equal("col_a", 1),
                         Expressions.equal("col_b", 2)),
        Expressions.equal("col_c", 3))

    conv_expr = Expressions.convert_string_to_expr(
        "col_a=1 and col_b=2 or col_c=3")
    assert expected_expr == conv_expr
コード例 #5
0
def test_complex_expansion():
    expected_expr = Expressions.or_(
        Expressions.and_(
            Expressions.equal("a", 1),
            Expressions.and_(Expressions.equal("b", 2),
                             Expressions.not_equal("c", 3))),
        Expressions.is_null("d"))
    conv_expr = Expressions.convert_string_to_expr(
        "(a=1 and b=2 and c<>3) or d is null")
    assert expected_expr == conv_expr
コード例 #6
0
def test_complex_expr():
    expr = Expressions.or_(
        Expressions.and_(Expressions.greater_than('a', 1),
                         Expressions.equal("b", "US")),
        Expressions.equal("c", True))

    translated_dataset_filter = get_dataset_filter(expr, {
        'a': 'a',
        'b': 'b',
        'c': 'c'
    })
    dataset_filter = (((ds.field("a") > 1) & (ds.field("b") == "US")) |
                      (ds.field("c") == True))  # noqa: E712
    assert dataset_filter.equals(translated_dataset_filter)
コード例 #7
0
def test_case_sensitive_int_not_eq_rewritten(inc_man_spec, inc_man_file, val,
                                             expected):
    with pytest.raises(ValidationException):
        assert InclusiveManifestEvaluator(
            inc_man_spec,
            Expressions.not_(Expressions.equal("ID", val)),
            case_sensitive=True).eval(inc_man_file) == expected
コード例 #8
0
def test_multiple_references(assert_all_bound):
    expr = Expressions.or_(
        Expressions.and_(Expressions.equal("x", 7),
                         Expressions.less_than("y", 100)),
        Expressions.greater_than("z", -100))

    assert_all_bound("Multiple references", Binder.bind(STRUCT, expr))
コード例 #9
0
def test_precedence_with_between():
    expected_expr = Expressions.or_(
        Expressions.and_(Expressions.greater_than_or_equal("col_a", 1),
                         Expressions.less_than_or_equal("col_a", 2)),
        Expressions.equal("col_c", 3))

    conv_expr = Expressions.convert_string_to_expr(
        "col_a between 1 and 2 or col_c=3")
    assert expected_expr == conv_expr
コード例 #10
0
def test_integer_eq(schema, file):
    assert not InclusiveMetricsEvaluator(schema, Expressions.equal("id", 5)).eval(file)
    assert not InclusiveMetricsEvaluator(schema, Expressions.equal("id", 29)).eval(file)
    assert not InclusiveMetricsEvaluator(schema, Expressions.equal("id", 80)).eval(file)
    assert not InclusiveMetricsEvaluator(schema, Expressions.equal("id", 85)).eval(file)
    assert InclusiveMetricsEvaluator(schema, Expressions.equal("id", 30)).eval(file)
    assert InclusiveMetricsEvaluator(schema, Expressions.equal("id", 75)).eval(file)
    assert InclusiveMetricsEvaluator(schema, Expressions.equal("id", 79)).eval(file)
コード例 #11
0
def test_not_eq_rewritten(strict_schema, strict_file):
    assert StrictMetricsEvaluator(strict_schema, Expressions.not_(Expressions.equal("id", 5))).eval(strict_file)
    assert StrictMetricsEvaluator(strict_schema, Expressions.not_(Expressions.equal("id", 29))).eval(strict_file)
    assert not StrictMetricsEvaluator(strict_schema, Expressions.not_(Expressions.equal("id", 30))).eval(strict_file)
    assert not StrictMetricsEvaluator(strict_schema, Expressions.not_(Expressions.equal("id", 75))).eval(strict_file)
    assert not StrictMetricsEvaluator(strict_schema, Expressions.not_(Expressions.equal("id", 79))).eval(strict_file)
    assert StrictMetricsEvaluator(strict_schema, Expressions.not_(Expressions.equal("id", 80))).eval(strict_file)
    assert StrictMetricsEvaluator(strict_schema, Expressions.not_(Expressions.equal("id", 85))).eval(strict_file)
コード例 #12
0
def test_compound_filter(primitive_type_test_file):
    expected_schema = Schema([
        NestedField.required(1, "int_col", IntegerType.get()),
        NestedField.optional(2, "bigint_col", LongType.get()),
        NestedField.optional(4, "float_col", FloatType.get()),
        NestedField.optional(5, "dbl_col", DoubleType.get()),
        NestedField.optional(3, "string_col", StringType.get())
    ])

    input_file = FileSystemInputFile(get_fs(primitive_type_test_file, conf={}),
                                     primitive_type_test_file, {})
    reader = ParquetReader(
        input_file, expected_schema, {},
        Expressions.and_(Expressions.equal("string_col", "us"),
                         Expressions.equal("int_col", 1)), True)
    pyarrow_array = [
        pa.array([1], type=pa.int32()),
        pa.array([1], type=pa.int64()),
        pa.array([1.0], type=pa.float32()),
        pa.array([1.0], type=pa.float64()),
        pa.array(['us'], type=pa.string())
    ]

    source_table = pa.table(pyarrow_array,
                            schema=pa.schema([
                                pa.field("int_col", pa.int32(),
                                         nullable=False),
                                pa.field("bigint_col",
                                         pa.int64(),
                                         nullable=True),
                                pa.field("float_col",
                                         pa.float32(),
                                         nullable=True),
                                pa.field("dbl_col",
                                         pa.float64(),
                                         nullable=True),
                                pa.field("string_col",
                                         pa.string(),
                                         nullable=True)
                            ]))

    target_table = reader.read()
    assert source_table == target_table
コード例 #13
0
def test_not(assert_all_bound, assert_and_unwrap):
    expr = Expressions.not_(Expressions.equal("x", 7))
    bound_expr = Binder.bind(STRUCT, expr)
    assert_all_bound("Not", bound_expr)

    not_ = assert_and_unwrap(bound_expr, Not)

    child = assert_and_unwrap(not_.child, None)
    # should bind x correctly
    assert 0 == child.ref.field_id
コード例 #14
0
def test_missing_stats(strict_schema, missing_stats):
    exprs = [Expressions.less_than("no_stats", 5),
             Expressions.less_than_or_equal("no_stats", 30),
             Expressions.equal("no_stats", 70),
             Expressions.greater_than("no_stats", 78),
             Expressions.greater_than_or_equal("no_stats", 90),
             Expressions.not_equal("no_stats", 101),
             Expressions.is_null("no_stats"),
             Expressions.not_null("no_stats")]

    for expr in exprs:
        assert not StrictMetricsEvaluator(strict_schema, expr).eval(missing_stats)
コード例 #15
0
def test_zero_record_file(strict_schema, empty):

    exprs = [Expressions.less_than("no_stats", 5),
             Expressions.less_than_or_equal("no_stats", 30),
             Expressions.equal("no_stats", 70),
             Expressions.greater_than("no_stats", 78),
             Expressions.greater_than_or_equal("no_stats", 90),
             Expressions.not_equal("no_stats", 101),
             Expressions.is_null("no_stats"),
             Expressions.not_null("no_stats")]
    for expr in exprs:
        assert StrictMetricsEvaluator(strict_schema, expr).eval(empty)
コード例 #16
0
def test_integer_eq(strict_schema, strict_file):
    assert not StrictMetricsEvaluator(strict_schema, Expressions.equal("id", 5)).eval(strict_file)
    assert not StrictMetricsEvaluator(strict_schema, Expressions.equal("id", 30)).eval(strict_file)
    assert not StrictMetricsEvaluator(strict_schema, Expressions.equal("id", 75)).eval(strict_file)
    assert not StrictMetricsEvaluator(strict_schema, Expressions.equal("id", 79)).eval(strict_file)
    assert not StrictMetricsEvaluator(strict_schema, Expressions.equal("id", 80)).eval(strict_file)
    assert StrictMetricsEvaluator(strict_schema, Expressions.equal("always_5", 5)).eval(strict_file)
コード例 #17
0
def test_and(assert_all_bound, assert_and_unwrap):
    expr = Expressions.and_(Expressions.equal("x", 7),
                            Expressions.less_than("y", 100))
    bound_expr = Binder.bind(STRUCT, expr)
    assert_all_bound("And", bound_expr)

    and_ = assert_and_unwrap(bound_expr, And)

    left = assert_and_unwrap(and_.left, None)
    # should bind x correctly
    assert 0 == left.ref.field_id
    right = assert_and_unwrap(and_.right, None)
    # should bind y correctly
    assert 1 == right.ref.field_id
コード例 #18
0
def test_case_sensitive_reference():
    with raises(ice_ex.ValidationException):
        expr = Expressions.not_(Expressions.equal("X", 7))
        Binder.bind(STRUCT, expr, case_sensitive=True)
コード例 #19
0
def test_case_insensitive_reference(assert_all_bound):
    expr = Expressions.not_(Expressions.equal("X", 7))
    assert_all_bound("Single reference",
                     Binder.bind(STRUCT, expr, case_sensitive=False))
コード例 #20
0
def test_single_reference(assert_all_bound):
    expr = Expressions.not_(Expressions.equal("x", 7))
    assert_all_bound("Single reference", Binder.bind(STRUCT, expr))
コード例 #21
0
def test_bound_expression_fails():
    with raises(RuntimeError):
        expr = Expressions.not_(Expressions.equal("x", 7))
        Binder.bind(STRUCT, Binder.bind(STRUCT, expr))
コード例 #22
0
def test_compound_not_equal():
    expected_expr = Expressions.not_(Expressions.equal("col_a", 7))
    conv_expr = Expressions.convert_string_to_expr("not (col_a = 7)")
    assert expected_expr == conv_expr
コード例 #23
0
def test_and():
    expected_expr = Expressions.and_(Expressions.equal("col_a", 1),
                                     Expressions.equal("col_b", 2))
    conv_expr = Expressions.convert_string_to_expr("col_a=1 and col_b=2")
    assert expected_expr == conv_expr
コード例 #24
0
def test_equal_alt_syntax():
    expected_expr = Expressions.equal("col_a", 1)
    conv_expr = Expressions.convert_string_to_expr("col_a==1")
    assert expected_expr == conv_expr
コード例 #25
0
def test_int_not_eq_rewritten(inc_man_spec, inc_man_file, val, expected):
    assert InclusiveManifestEvaluator(inc_man_spec,
                                      Expressions.not_(Expressions.equal("id", val))).eval(inc_man_file) == expected
コード例 #26
0
def test_case_insensitive_int_not_eq_rewritten(inc_man_spec, inc_man_file, val, expected):
    assert InclusiveManifestEvaluator(inc_man_spec,
                                      Expressions.not_(Expressions.equal("ID", val)),
                                      case_sensitive=False).eval(inc_man_file) == expected
コード例 #27
0
def test_str_equal_double_quotes():
    expected_expr = Expressions.equal("col_a", "123")
    conv_expr = Expressions.convert_string_to_expr("col_a=\"123\"")
    assert expected_expr == conv_expr
コード例 #28
0
                    Operation.LT, Operation.LT_EQ, Operation.GT,
                    Operation.GT_EQ, Operation.EQ, Operation.NOT_EQ
                ])
def op(request):
    yield request.param


@pytest.fixture(scope="session",
                params=[
                    Expressions.always_false(),
                    Expressions.always_true(),
                    Expressions.less_than("x", 5),
                    Expressions.less_than_or_equal("y", -3),
                    Expressions.greater_than("z", 0),
                    Expressions.greater_than_or_equal("t", 129),
                    Expressions.equal("col", "data"),
                    Expressions.not_equal("col", "abc"),
                    Expressions.not_null("maybeNull"),
                    Expressions.is_null("maybeNull2"),
                    Expressions.not_(Expressions.greater_than("a", 10)),
                    Expressions.and_(Expressions.greater_than_or_equal("a", 0),
                                     Expressions.less_than("a", 3)),
                    Expressions.or_(Expressions.less_than("a", 0),
                                    Expressions.greater_than("a", 10)),
                    Expressions.equal("a", 5).bind(exp_schema.as_struct())
                ])
def expression(request):
    yield request.param


@pytest.fixture(scope="session",
コード例 #29
0
def test_or():
    expected_expr = Expressions.or_(Expressions.equal("col_a", 1),
                                    Expressions.equal("col_b", 2))
    conv_expr = Expressions.convert_string_to_expr("col_a=1 or col_b=2")
    assert expected_expr == conv_expr
コード例 #30
0
import pytest


@pytest.mark.parametrize(
    "expr, dataset_filter, column_map",
    [(Expressions.greater_than('a', 1), ds.field('a') > 1, {
        'a': 'a'
    }),
     (Expressions.greater_than_or_equal('a', 1), ds.field('a') >= 1, {
         'a': 'a'
     }), (Expressions.less_than('a', 1), ds.field('a') < 1, {
         'a': 'a'
     }),
     (Expressions.less_than_or_equal('a', 1), ds.field('a') <= 1, {
         'a': 'a'
     }), (Expressions.equal('a', 1), ds.field('a') == 1, {
         'a': 'a'
     }), (Expressions.not_equal('a', 1), ds.field('a') != 1, {
         'a': 'a'
     }), (Expressions.not_null('a'), ds.field('a').is_valid(), {
         'a': 'a'
     }), (Expressions.is_null('a'), ~ds.field('a').is_valid(), {
         'a': 'a'
     })])
def test_simple(expr, dataset_filter, column_map):
    translated_dataset_filter = get_dataset_filter(expr, column_map)
    assert dataset_filter.equals(translated_dataset_filter)


def test_not_conversion():
    expr = Expressions.not_(Expressions.greater_than('a', 1))