Ejemplo n.º 1
0
def check_predicates(predicates):
    """
    Check if predicates are well-formed.
    """
    if predicates is None:
        return

    if len(predicates) == 0:
        raise ValueError("Empty predicates")

    for conjunction_idx, conjunction in enumerate(predicates):
        if not isinstance(conjunction, list):
            raise ValueError(
                f"Invalid predicates: Conjunction {conjunction_idx} should be a "
                f"list of 3-tuples, got object of type {type(conjunction)} instead."
            )
        if len(conjunction) == 0:
            raise ValueError(
                f"Invalid predicates: Conjunction {conjunction_idx} is empty")
        for clause_idx, clause in enumerate(conjunction):
            if not isinstance(clause, tuple) and len(clause) == 3:
                raise ValueError(
                    f"Invalid predicates: Clause {clause_idx} in conjunction {conjunction_idx} "
                    f"should be a 3-tuple, got object of type {type(clause)} instead"
                )
            _, _, val = clause
            if (isinstance(val, list)
                    and any(_check_contains_null(v) for v in val)
                    or _check_contains_null(val)):
                raise NotImplementedError(
                    "Null-terminated binary strings are not supported as predicate values."
                )
Ejemplo n.º 2
0
def check_predicates(predicates):
    """
    Check if predicates are well-formed.
    """
    if predicates is not None:
        if len(predicates) == 0 or any(len(p) == 0 for p in predicates):
            raise ValueError("Malformed predicates")
        for conjunction in predicates:
            for col, op, val in conjunction:
                if (isinstance(val, list)
                        and any(_check_contains_null(v) for v in val)
                        or _check_contains_null(val)):
                    raise NotImplementedError(
                        "Null-terminated binary strings are not supported as predicate values."
                    )
Ejemplo n.º 3
0
def check_predicates(predicates: PredicatesType) -> None:
    """
    Check if predicates are well-formed.
    """
    if predicates is None:
        return

    if len(predicates) == 0:
        raise ValueError("Empty predicates")

    for conjunction_idx, conjunction in enumerate(predicates):
        if not isinstance(conjunction, list):
            raise ValueError(
                f"Invalid predicates: Conjunction {conjunction_idx} should be a "
                f"list of 3-tuples, got object of type {type(conjunction)} instead."
            )
        if len(conjunction) == 0:
            raise ValueError(
                f"Invalid predicates: Conjunction {conjunction_idx} is empty"
            )
        for clause_idx, clause in enumerate(conjunction):
            if not isinstance(clause, tuple) and len(clause) == 3:
                raise ValueError(
                    f"Invalid predicates: Clause {clause_idx} in conjunction {conjunction_idx} "
                    f"should be a 3-tuple, got object of type {type(clause)} instead"
                )
            _, op, val = clause
            if (
                isinstance(val, list)
                and any(_check_contains_null(v) for v in val)
                or _check_contains_null(val)
            ):
                raise NotImplementedError(
                    "Null-terminated binary strings are not supported as predicate values."
                )
            if (
                pd.api.types.is_scalar(val)
                and pd.isnull(val)
                and op not in ["==", "!="]
            ):
                raise ValueError(
                    f"Invalid predicates: Clause {clause_idx} in conjunction {conjunction_idx} "
                    f"with null value and operator {op}. Only operators supporting null values "
                    "are '==', '!=' and 'in'."
                )
Ejemplo n.º 4
0
def test_pushdown_binaries(store, dataframe_not_nested, binary_value,
                           chunk_size):
    if _check_contains_null(binary_value):
        pytest.xfail("Null-terminated binary strings are not supported")
    serialiser = ParquetSerializer(chunk_size=chunk_size)
    key = serialiser.store(store, "prefix", dataframe_not_nested)

    predicates = [[("bytes", "==", binary_value)]]

    df_restored = serialiser.restore_dataframe(store,
                                               key,
                                               predicates=predicates)
    assert len(df_restored) == 1
    assert df_restored.iloc[0].bytes == binary_value