Example #1
0
def math_op_base():
    ctx: CylonContext = CylonContext(config=None, distributed=False)
    num_rows = 10_000_000
    data = np.random.randn(num_rows)

    df = pd.DataFrame({'data{}'.format(i): data for i in range(100)})

    np_key = np.random.randint(0, 100, size=num_rows)
    np_all = df.to_numpy()

    df['key'] = np_key

    rb = pa.record_batch(df)
    t = pa.Table.from_pandas(df)

    ct = Table.from_pandas(ctx, df)

    t1 = time.time()
    np_key + 1
    t2 = time.time()
    ct['key'] + 1
    t3 = time.time()
    df['key'] + 1
    t4 = time.time()
    artb = ct.to_arrow().combine_chunks()
    ar_key = ct['key'].to_arrow().combine_chunks().columns[0].chunks[0]
    pc.add(ar_key, 1)
    t5 = time.time()

    print(f"Numpy Time: {t2 - t1} s")
    print(f"PyCylon Time: {t3 - t2} s")
    print(f"Pandas Time: {t4 - t3} s")
    print(f"PyArrow Time: {t5 - t4} s")
Example #2
0
def test_input_type_conversion():
    # Automatic array conversion from Python
    arr = pc.add([1, 2], [4, None])
    assert arr.to_pylist() == [5, None]
    # Automatic scalar conversion from Python
    arr = pc.add([1, 2], 4)
    assert arr.to_pylist() == [5, 6]
    # Other scalar type
    assert pc.equal(["foo", "bar", None],
                    "foo").to_pylist() == [True, False, None]
Example #3
0
def clean_cat(arr, categories=[]):
    arr = arr.cast(pa.string()).dictionary_encode()
    dic = arr.dictionary.to_pylist()
    if categories:
        d = {
            i: (categories.index(v) + 1 if v in categories else 0)
            for i, v in enumerate(dic)
        }
        d[-1] = 0  # NULLs -> 0
        return (pa.array(
            np.vectorize(d.get)(arr.indices.fill_null(-1).to_numpy())),
                ['Unknown'] + categories)
    else:
        return (c.add(arr.indices,
                      pa.array([1], type=pa.int32())[0]).fill_null(0),
                ['Unknown'] + dic)
Example #4
0
def test_arithmetic_add():
    left = pa.array([1, 2, 3, 4, 5])
    right = pa.array([0, -1, 1, 2, 3])
    result = pc.add(left, right)
    expected = pa.array([1, 1, 4, 6, 8])
    assert result.equals(expected)
Example #5
0
def my_add(arr1, arr2, **kwargs):
    return pc.add(arr1, arr2)