Ejemplo n.º 1
0
def test_compare_scalar(typ):
    if typ == "array":

        def con(values):
            return pa.array(values)
    else:

        def con(values):
            return pa.chunked_array([values])

    arr = con([1, 2, 3, None])
    scalar = pa.scalar(2)

    result = pc.equal(arr, scalar)
    assert result.equals(con([False, True, False, None]))

    if typ == "array":
        nascalar = pa.scalar(None, type="int64")
        result = pc.equal(arr, nascalar)
        assert result.to_pylist() == [None, None, None, None]

    result = pc.not_equal(arr, scalar)
    assert result.equals(con([True, False, True, None]))

    result = pc.less(arr, scalar)
    assert result.equals(con([True, False, False, None]))

    result = pc.less_equal(arr, scalar)
    assert result.equals(con([True, True, False, None]))

    result = pc.greater(arr, scalar)
    assert result.equals(con([False, False, True, None]))

    result = pc.greater_equal(arr, scalar)
    assert result.equals(con([False, True, True, None]))
Ejemplo n.º 2
0
def test_compare_array(typ):
    if typ == "array":

        def con(values):
            return pa.array(values)
    else:

        def con(values):
            return pa.chunked_array([values])

    arr1 = con([1, 2, 3, 4, None])
    arr2 = con([1, 1, 4, None, 4])

    result = pc.equal(arr1, arr2)
    assert result.equals(con([True, False, False, None, None]))

    result = pc.not_equal(arr1, arr2)
    assert result.equals(con([False, True, True, None, None]))

    result = pc.less(arr1, arr2)
    assert result.equals(con([False, False, True, None, None]))

    result = pc.less_equal(arr1, arr2)
    assert result.equals(con([True, False, True, None, None]))

    result = pc.greater(arr1, arr2)
    assert result.equals(con([False, True, False, None, None]))

    result = pc.greater_equal(arr1, arr2)
    assert result.equals(con([True, True, False, None, None]))
Ejemplo n.º 3
0
def test_compare_scalar(typ):
    if typ == "array":

        def con(values):
            return pa.array(values)
    else:

        def con(values):
            return pa.chunked_array([values])

    arr = con([1, 2, 3, None])
    # TODO this is a hacky way to construct a scalar ..
    scalar = pa.array([2]).sum()

    result = pc.equal(arr, scalar)
    assert result.equals(con([False, True, False, None]))

    result = pc.not_equal(arr, scalar)
    assert result.equals(con([True, False, True, None]))

    result = pc.less(arr, scalar)
    assert result.equals(con([True, False, False, None]))

    result = pc.less_equal(arr, scalar)
    assert result.equals(con([True, True, False, None]))

    result = pc.greater(arr, scalar)
    assert result.equals(con([False, False, True, None]))

    result = pc.greater_equal(arr, scalar)
    assert result.equals(con([False, True, True, None]))
Ejemplo n.º 4
0
def test_compare_string_scalar(typ):
    if typ == "array":
        def con(values): return pa.array(values)
    else:
        def con(values): return pa.chunked_array([values])

    arr = con(['a', 'b', 'c', None])
    scalar = pa.scalar('b')

    result = pc.equal(arr, scalar)
    assert result.equals(con([False, True, False, None]))

    if typ == "array":
        nascalar = pa.scalar(None, type="string")
        result = pc.equal(arr, nascalar)
        isnull = pc.is_null(result)
        assert isnull.equals(con([True, True, True, True]))

    result = pc.not_equal(arr, scalar)
    assert result.equals(con([True, False, True, None]))

    result = pc.less(arr, scalar)
    assert result.equals(con([True, False, False, None]))

    result = pc.less_equal(arr, scalar)
    assert result.equals(con([True, True, False, None]))

    result = pc.greater(arr, scalar)
    assert result.equals(con([False, False, True, None]))

    result = pc.greater_equal(arr, scalar)
    assert result.equals(con([False, True, True, None]))
Ejemplo n.º 5
0
def binary_col(op, l, r):
    """
  interpretor for executing binary operator expressions
  """
    if op == "+": return compute.add_checked(l, r)
    if op == "*": return compute.multiply_checked(l, r)
    if op == '-': return compute.subtract_checked(l, r)
    if op == "=": return compute.equal(l, r)
    if op == "<>": return compute.not_equal(l, r)
    if op == "!=": return compute.not_equal(l, r)
    if op == "or": return compute.or_(l, r)
    if op == "<": return compute.less(l, r)
    if op == ">": return compute.greater(l, r)
    if op == "/": return compute.divide_checked(l, r)
    if op == "and": return compute.and_(l, r)
    if op == "in": return compute.is_in(l, r)
    if op == "==": return compute.equal(l, r)
    if op == "<=": return compute.less_equal(l, r)
    if op == ">=": return compute.greater_equal(l, r)
    raise Exception("binary op not implemented")
Ejemplo n.º 6
0
table_df = table.to_pandas()

convert_options = csv.ConvertOptions(
    column_types={
        "VendorID": pa.bool_(),
        # "trip_distance": pa.float16()
    },
    true_values=["Y", "1"],
    false_values=["N", "2"])
table = csv.read_csv("../sec1-intro/yellow_tripdata_2020-01.csv.gz",
                     convert_options=convert_options)
print(table["store_and_fwd_flag"].unique(),
      table["store_and_fwd_flag"].nbytes // (1024**2),
      table["VendorID"].nbytes // 1024,
      table["store_and_fwd_flag"].nbytes // 1024)

x = pa.array([False, True]).cast(pa.string()).cast(pa.bool_())

table_df = table.to_pandas()
print(table_df.store_and_fwd_flag)
mission_impossible = table.to_pandas(self_destruct=True)

import pyarrow.compute as pc
pc.equal(table["total_amount"], 0)
pc.equal(table["total_amount"], 0.0)
t0 = table.filter(pc.not_equal(table["total_amount"], 0.0))

pc.mean(pc.divide(t0["tip_amount"], t0["total_amount"]))  # 18ms
# The fair comparison is (also do on other computer)