def test_ternary_condition():
    expected_expr = Expressions.and_(
        Expressions.equal("col_a", 1),
        Expressions.and_(Expressions.equal("col_b", 2),
                         Expressions.equal("col_c", 3)))

    conv_expr = Expressions.convert_string_to_expr(
        "col_a=1 and col_b=2 and col_c=3")
    assert expected_expr == conv_expr
def test_and(schema, file):
    assert not InclusiveMetricsEvaluator(
        schema,
        Expressions.and_(Expressions.less_than(
            "id", 5), Expressions.greater_than_or_equal("id", 0))).eval(file)
    assert InclusiveMetricsEvaluator(
        schema,
        Expressions.and_(Expressions.greater_than("id", 5),
                         Expressions.less_than_or_equal("id", 30))).eval(file)
def test_complex_expansion():
    expected_expr = Expressions.or_(
        Expressions.and_(
            Expressions.equal("a", 1),
            Expressions.and_(Expressions.equal("b", 2),
                             Expressions.not_equal("c", 3))),
        Expressions.is_null("d"))
    conv_expr = Expressions.convert_string_to_expr(
        "(a=1 and b=2 and c<>3) or d is null")
    assert expected_expr == conv_expr
def test_and(strict_schema, strict_file):
    assert not StrictMetricsEvaluator(strict_schema,
                                      Expressions.and_(Expressions.greater_than("id", 5),
                                                       Expressions.less_than_or_equal("id", 30))).eval(strict_file)
    assert not StrictMetricsEvaluator(strict_schema,
                                      Expressions.and_(Expressions.less_than("id", 5),
                                                       Expressions.greater_than_or_equal("id", 0))).eval(strict_file)
    assert StrictMetricsEvaluator(strict_schema,
                                  Expressions.and_(Expressions.less_than("id", 85),
                                                   Expressions.greater_than_or_equal("id", 0))).eval(strict_file)
def test_missing_reference():
    expr = Expressions.and_(Expressions.equal("t", 5),
                            Expressions.equal("x", 7))
    try:
        Binder.bind(STRUCT, expr)
    except ice_ex.ValidationException as e:
        assert "Cannot find field 't' in struct" in "{}".format(e)
def test_multiple_references(assert_all_bound):
    expr = Expressions.or_(
        Expressions.and_(Expressions.equal("x", 7),
                         Expressions.less_than("y", 100)),
        Expressions.greater_than("z", -100))

    assert_all_bound("Multiple references", Binder.bind(STRUCT, expr))
def test_precedence_explicit():
    expected_expr = Expressions.and_(
        Expressions.equal("col_a", 1),
        Expressions.or_(Expressions.equal("col_b", 2),
                        Expressions.equal("col_c", 3)))

    conv_expr = Expressions.convert_string_to_expr(
        "col_a=1 and (col_b=2 or col_c=3)")
    assert expected_expr == conv_expr
def test_precedence_with_between():
    expected_expr = Expressions.or_(
        Expressions.and_(Expressions.greater_than_or_equal("col_a", 1),
                         Expressions.less_than_or_equal("col_a", 2)),
        Expressions.equal("col_c", 3))

    conv_expr = Expressions.convert_string_to_expr(
        "col_a between 1 and 2 or col_c=3")
    assert expected_expr == conv_expr
def test_precedence_opposite_order():
    expected_expr = Expressions.or_(
        Expressions.and_(Expressions.equal("col_a", 1),
                         Expressions.equal("col_b", 2)),
        Expressions.equal("col_c", 3))

    conv_expr = Expressions.convert_string_to_expr(
        "col_a=1 and col_b=2 or col_c=3")
    assert expected_expr == conv_expr
def test_complex_expr():
    expr = Expressions.or_(
        Expressions.and_(Expressions.greater_than('a', 1),
                         Expressions.equal("b", "US")),
        Expressions.equal("c", True))

    translated_dataset_filter = get_dataset_filter(expr, {
        'a': 'a',
        'b': 'b',
        'c': 'c'
    })
    dataset_filter = (((ds.field("a") > 1) & (ds.field("b") == "US")) |
                      (ds.field("c") == True))  # noqa: E712
    assert dataset_filter.equals(translated_dataset_filter)
def test_and(assert_all_bound, assert_and_unwrap):
    expr = Expressions.and_(Expressions.equal("x", 7),
                            Expressions.less_than("y", 100))
    bound_expr = Binder.bind(STRUCT, expr)
    assert_all_bound("And", bound_expr)

    and_ = assert_and_unwrap(bound_expr, And)

    left = assert_and_unwrap(and_.left, None)
    # should bind x correctly
    assert 0 == left.ref.field_id
    right = assert_and_unwrap(and_.right, None)
    # should bind y correctly
    assert 1 == right.ref.field_id
def test_basic_simplification(assert_and_unwrap):
    # Should simplify or expression to alwaysTrue
    assert Expressions.always_true() == Binder.bind(
        STRUCT,
        Expressions.or_(Expressions.less_than("y", 100),
                        Expressions.greater_than("z", -9999999999)))
    # Should simplify or expression to alwaysfalse
    assert Expressions.always_false() == Binder.bind(
        STRUCT,
        Expressions.and_(Expressions.less_than("y", 100),
                         Expressions.less_than("z", -9999999999)))

    bound = Binder.bind(
        STRUCT,
        Expressions.not_(Expressions.not_(Expressions.less_than("y", 100))))
    pred = assert_and_unwrap(bound, None)
    assert 1 == pred.ref.field_id
Exemple #13
0
def test_compound_filter(primitive_type_test_file):
    expected_schema = Schema([
        NestedField.required(1, "int_col", IntegerType.get()),
        NestedField.optional(2, "bigint_col", LongType.get()),
        NestedField.optional(4, "float_col", FloatType.get()),
        NestedField.optional(5, "dbl_col", DoubleType.get()),
        NestedField.optional(3, "string_col", StringType.get())
    ])

    input_file = FileSystemInputFile(get_fs(primitive_type_test_file, conf={}),
                                     primitive_type_test_file, {})
    reader = ParquetReader(
        input_file, expected_schema, {},
        Expressions.and_(Expressions.equal("string_col", "us"),
                         Expressions.equal("int_col", 1)), True)
    pyarrow_array = [
        pa.array([1], type=pa.int32()),
        pa.array([1], type=pa.int64()),
        pa.array([1.0], type=pa.float32()),
        pa.array([1.0], type=pa.float64()),
        pa.array(['us'], type=pa.string())
    ]

    source_table = pa.table(pyarrow_array,
                            schema=pa.schema([
                                pa.field("int_col", pa.int32(),
                                         nullable=False),
                                pa.field("bigint_col",
                                         pa.int64(),
                                         nullable=True),
                                pa.field("float_col",
                                         pa.float32(),
                                         nullable=True),
                                pa.field("dbl_col",
                                         pa.float64(),
                                         nullable=True),
                                pa.field("string_col",
                                         pa.string(),
                                         nullable=True)
                            ]))

    target_table = reader.read()
    assert source_table == target_table
Exemple #14
0
    def join_filters(expressions):
        result = Expressions.always_true()
        for expression in expressions:
            result = Expressions.and_(result, expression)

        return result
 def filter_rows(self, expr):
     projected = inclusive(self.reader.spec).project(expr)
     return FilteredManifest(self.reader,
                             Expressions.and_(self.part_filter, projected),
                             Expressions.and_(self.row_filter, expr),
                             self.columns, self.case_sensitive)
 def filter_partitions(self, expr):
     return FilteredManifest(self.reader,
                             Expressions.and_(self.part_filter,
                                              expr), self.row_filter,
                             self.columns, self.case_sensitive)
def test_and(inc_man_spec, inc_man_file, expr1, expr2, expected):
    assert InclusiveManifestEvaluator(inc_man_spec, Expressions.and_(expr1, expr2)).eval(inc_man_file) == expected

@pytest.fixture(scope="session",
                params=[
                    Expressions.always_false(),
                    Expressions.always_true(),
                    Expressions.less_than("x", 5),
                    Expressions.less_than_or_equal("y", -3),
                    Expressions.greater_than("z", 0),
                    Expressions.greater_than_or_equal("t", 129),
                    Expressions.equal("col", "data"),
                    Expressions.not_equal("col", "abc"),
                    Expressions.not_null("maybeNull"),
                    Expressions.is_null("maybeNull2"),
                    Expressions.not_(Expressions.greater_than("a", 10)),
                    Expressions.and_(Expressions.greater_than_or_equal("a", 0),
                                     Expressions.less_than("a", 3)),
                    Expressions.or_(Expressions.less_than("a", 0),
                                    Expressions.greater_than("a", 10)),
                    Expressions.equal("a", 5).bind(exp_schema.as_struct())
                ])
def expression(request):
    yield request.param


@pytest.fixture(scope="session",
                params=[
                    Expressions.less_than("no_stats", 5),
                    Expressions.less_than_or_equal("no_stats", 30),
                    Expressions.equal("no_stats", 70),
                    Expressions.greater_than("no_stats", 78),
                    Expressions.greater_than_or_equal("no_stats", 90),
def test_between():
    expected_expr = Expressions.and_(
        Expressions.greater_than_or_equal("col_a", 1),
        Expressions.less_than_or_equal("col_a", 2))
    conv_expr = Expressions.convert_string_to_expr("col_a between 1 and 2")
    assert expected_expr == conv_expr
def test_and():
    expected_expr = Expressions.and_(Expressions.equal("col_a", 1),
                                     Expressions.equal("col_b", 2))
    conv_expr = Expressions.convert_string_to_expr("col_a=1 and col_b=2")
    assert expected_expr == conv_expr
 def filter(self, expr):
     return self.new_refined_scan(self.ops, self.table, self._schema, snapshot_id=self.snapshot_id,
                                  row_filter=Expressions.and_(self._row_filter, expr),
                                  case_sensitive=self._case_sensitive, selected_columns=self.selected_columns,
                                  options=self.options, minused_cols=self.minused_cols)