Example #1
0
def test_array_slice():
    arr = pyarrow.from_pylist(range(10))

    sliced = arr.slice(2)
    expected = pyarrow.from_pylist(range(2, 10))
    assert sliced.equals(expected)

    sliced2 = arr.slice(2, 4)
    expected2 = pyarrow.from_pylist(range(2, 6))
    assert sliced2.equals(expected2)

    # 0 offset
    assert arr.slice(0).equals(arr)

    # Slice past end of array
    assert len(arr.slice(len(arr))) == 0

    with pytest.raises(IndexError):
        arr.slice(-1)

    # Test slice notation
    assert arr[2:].equals(arr.slice(2))

    assert arr[2:5].equals(arr.slice(2, 3))

    assert arr[-5:].equals(arr.slice(len(arr) - 5))

    with pytest.raises(IndexError):
        arr[::-1]

    with pytest.raises(IndexError):
        arr[::2]
Example #2
0
def test_recordbatch_slice():
    data = [pa.from_pylist(range(5)), pa.from_pylist([-10, -5, 0, 5, 10])]
    names = ['c0', 'c1']

    batch = pa.RecordBatch.from_arrays(data, names)

    sliced = batch.slice(2)

    assert sliced.num_rows == 3

    expected = pa.RecordBatch.from_arrays([x.slice(2) for x in data], names)
    assert sliced.equals(expected)

    sliced2 = batch.slice(2, 2)
    expected2 = pa.RecordBatch.from_arrays([x.slice(2, 2) for x in data],
                                           names)
    assert sliced2.equals(expected2)

    # 0 offset
    assert batch.slice(0).equals(batch)

    # Slice past end of array
    assert len(batch.slice(len(batch))) == 0

    with pytest.raises(IndexError):
        batch.slice(-1)
Example #3
0
def test_concat_tables():
    data = [
        list(range(5)),
        [-10., -5., 0., 5., 10.]
    ]
    data2 = [
        list(range(5, 10)),
        [1., 2., 3., 4., 5.]
    ]

    t1 = pa.Table.from_arrays([pa.from_pylist(x) for x in data],
                              names=('a', 'b'), name='table_name')
    t2 = pa.Table.from_arrays([pa.from_pylist(x) for x in data2],
                              names=('a', 'b'), name='table_name')

    result = pa.concat_tables([t1, t2], output_name='foo')
    assert result.name == 'foo'
    assert len(result) == 10

    expected = pa.Table.from_arrays([pa.from_pylist(x + y)
                                     for x, y in zip(data, data2)],
                                    names=('a', 'b'),
                                    name='foo')

    assert result.equals(expected)
Example #4
0
def test_recordbatch_slice():
    data = [
        pa.from_pylist(range(5)),
        pa.from_pylist([-10, -5, 0, 5, 10])
    ]
    names = ['c0', 'c1']

    batch = pa.RecordBatch.from_arrays(data, names)

    sliced = batch.slice(2)

    assert sliced.num_rows == 3

    expected = pa.RecordBatch.from_arrays(
        [x.slice(2) for x in data], names)
    assert sliced.equals(expected)

    sliced2 = batch.slice(2, 2)
    expected2 = pa.RecordBatch.from_arrays(
        [x.slice(2, 2) for x in data], names)
    assert sliced2.equals(expected2)

    # 0 offset
    assert batch.slice(0).equals(batch)

    # Slice past end of array
    assert len(batch.slice(len(batch))) == 0

    with pytest.raises(IndexError):
        batch.slice(-1)
Example #5
0
def test_to_pandas_zero_copy():
    import gc

    arr = pyarrow.from_pylist(range(10))

    for i in range(10):
        np_arr = arr.to_pandas()
        assert sys.getrefcount(np_arr) == 2
        np_arr = None  # noqa

    assert sys.getrefcount(arr) == 2

    for i in range(10):
        arr = pyarrow.from_pylist(range(10))
        np_arr = arr.to_pandas()
        arr = None
        gc.collect()

        # Ensure base is still valid

        # Because of py.test's assert inspection magic, if you put getrefcount
        # on the line being examined, it will be 1 higher than you expect
        base_refcount = sys.getrefcount(np_arr.base)
        assert base_refcount == 2
        np_arr.sum()
Example #6
0
def test_recordbatch_basics():
    data = [pa.from_pylist(range(5)), pa.from_pylist([-10, -5, 0, 5, 10])]

    batch = pa.RecordBatch.from_arrays(['c0', 'c1'], data)

    assert len(batch) == 5
    assert batch.num_rows == 5
    assert batch.num_columns == len(data)
Example #7
0
    def test_garbage_collection(self):
        import gc

        # Force the cyclic garbage collector to run
        gc.collect()

        bytes_before = pyarrow.total_allocated_bytes()
        pyarrow.from_pylist([1, None, 3, None])
        gc.collect()
        assert pyarrow.total_allocated_bytes() == bytes_before
    def test_garbage_collection(self):
        import gc

        # Force the cyclic garbage collector to run
        gc.collect()

        bytes_before = pa.total_allocated_bytes()
        pa.from_pylist([1, None, 3, None])
        gc.collect()
        assert pa.total_allocated_bytes() == bytes_before
Example #9
0
def test_recordbatch_basics():
    data = [pa.from_pylist(range(5)), pa.from_pylist([-10, -5, 0, 5, 10])]

    batch = pa.RecordBatch.from_arrays(['c0', 'c1'], data)

    assert len(batch) == 5
    assert batch.num_rows == 5
    assert batch.num_columns == len(data)
    assert batch.to_pydict() == OrderedDict([('c0', [0, 1, 2, 3, 4]),
                                             ('c1', [-10, -5, 0, 5, 10])])
Example #10
0
def test_table_remove_column():
    data = [
        pa.from_pylist(range(5)),
        pa.from_pylist([-10, -5, 0, 5, 10]),
        pa.from_pylist(range(5, 10))
    ]
    table = pa.Table.from_arrays(data, names=('a', 'b', 'c'))

    t2 = table.remove_column(0)
    expected = pa.Table.from_arrays(data[1:], names=('b', 'c'))
    assert t2.equals(expected)
Example #11
0
def test_table_remove_column():
    data = [
        pa.from_pylist(range(5)),
        pa.from_pylist([-10, -5, 0, 5, 10]),
        pa.from_pylist(range(5, 10))
    ]
    table = pa.Table.from_arrays(data, names=('a', 'b', 'c'))

    t2 = table.remove_column(0)
    expected = pa.Table.from_arrays(data[1:], names=('b', 'c'))
    assert t2.equals(expected)
Example #12
0
def test_recordbatch_basics():
    data = [
        pa.from_pylist(range(5)),
        pa.from_pylist([-10, -5, 0, 5, 10])
    ]

    batch = pa.RecordBatch.from_arrays(['c0', 'c1'], data)

    assert len(batch) == 5
    assert batch.num_rows == 5
    assert batch.num_columns == len(data)
Example #13
0
    def test_basics(self):
        data = [A.from_pylist(range(5)), A.from_pylist([-10, -5, 0, 5, 10])]
        table = A.Table.from_arrays(('a', 'b'), data, 'table_name')
        assert table.name == 'table_name'
        assert len(table) == 5
        assert table.num_rows == 5
        assert table.num_columns == 2
        assert table.shape == (5, 2)

        for col in table.itercolumns():
            for chunk in col.data.iterchunks():
                assert chunk is not None
Example #14
0
def test_table_basics():
    data = [pa.from_pylist(range(5)), pa.from_pylist([-10, -5, 0, 5, 10])]
    table = pa.Table.from_arrays(data, names=('a', 'b'))
    assert len(table) == 5
    assert table.num_rows == 5
    assert table.num_columns == 2
    assert table.shape == (5, 2)
    assert table.to_pydict() == OrderedDict([('a', [0, 1, 2, 3, 4]),
                                             ('b', [-10, -5, 0, 5, 10])])

    for col in table.itercolumns():
        for chunk in col.data.iterchunks():
            assert chunk is not None
Example #15
0
    def test_pandas(self):
        data = [A.from_pylist(range(5)), A.from_pylist([-10, -5, 0, 5, 10])]
        table = A.Table.from_arrays(('a', 'b'), data, 'table_name')

        # TODO: Use this part once from_pandas is implemented
        # data = {'a': range(5), 'b': [-10, -5, 0, 5, 10]}
        # df = pd.DataFrame(data)
        # A.Table.from_pandas(df)

        df = table.to_pandas()
        assert set(df.columns) == set(('a', 'b'))
        assert df.shape == (5, 2)
        assert df.ix[0, 'b'] == -10
Example #16
0
    def test_basics(self):
        data = [A.from_pylist(range(5)), A.from_pylist([-10, -5, 0, 5, 10])]
        num_rows = 5

        descr = A.schema(
            [A.field('c0', data[0].type),
             A.field('c1', data[1].type)])

        batch = A.RowBatch(descr, num_rows, data)

        assert len(batch) == num_rows
        assert batch.num_rows == num_rows
        assert batch.num_columns == len(data)
Example #17
0
def test_table_pandas():
    data = [pa.from_pylist(range(5)), pa.from_pylist([-10, -5, 0, 5, 10])]
    table = pa.Table.from_arrays(data, names=('a', 'b'))

    # TODO: Use this part once from_pandas is implemented
    # data = {'a': range(5), 'b': [-10, -5, 0, 5, 10]}
    # df = pd.DataFrame(data)
    # pa.Table.from_pandas(df)

    df = table.to_pandas()
    assert set(df.columns) == set(('a', 'b'))
    assert df.shape == (5, 2)
    assert df.loc[0, 'b'] == -10
Example #18
0
def test_recordbatch_basics():
    data = [
        pa.from_pylist(range(5)),
        pa.from_pylist([-10, -5, 0, 5, 10])
    ]

    batch = pa.RecordBatch.from_arrays(data, ['c0', 'c1'])

    assert len(batch) == 5
    assert batch.num_rows == 5
    assert batch.num_columns == len(data)
    assert batch.to_pydict() == OrderedDict([
        ('c0', [0, 1, 2, 3, 4]),
        ('c1', [-10, -5, 0, 5, 10])
    ])
Example #19
0
def test_table_basics():
    data = [
        pa.from_pylist(range(5)),
        pa.from_pylist([-10, -5, 0, 5, 10])
    ]
    table = pa.Table.from_arrays(('a', 'b'), data, 'table_name')
    assert table.name == 'table_name'
    assert len(table) == 5
    assert table.num_rows == 5
    assert table.num_columns == 2
    assert table.shape == (5, 2)

    for col in table.itercolumns():
        for chunk in col.data.iterchunks():
            assert chunk is not None
 def test_fixed_size_bytes(self):
     data = [b'foof', None, b'barb', b'2346']
     arr = pa.from_pylist(data, type=pa.binary(4))
     assert len(arr) == 4
     assert arr.null_count == 1
     assert arr.type == pa.binary(4)
     assert arr.to_pylist() == data
 def test_fixed_size_bytes(self):
     data = [b'foof', None, b'barb', b'2346']
     arr = pa.from_pylist(data, type=pa.binary(4))
     assert len(arr) == 4
     assert arr.null_count == 1
     assert arr.type == pa.binary(4)
     assert arr.to_pylist() == data
Example #22
0
def test_concat_tables():
    data = [list(range(5)), [-10., -5., 0., 5., 10.]]
    data2 = [list(range(5, 10)), [1., 2., 3., 4., 5.]]

    t1 = pa.Table.from_arrays([pa.from_pylist(x) for x in data],
                              names=('a', 'b'))
    t2 = pa.Table.from_arrays([pa.from_pylist(x) for x in data2],
                              names=('a', 'b'))

    result = pa.concat_tables([t1, t2])
    assert len(result) == 10

    expected = pa.Table.from_arrays(
        [pa.from_pylist(x + y) for x, y in zip(data, data2)], names=('a', 'b'))

    assert result.equals(expected)
Example #23
0
def test_table_pandas():
    data = [
        pa.from_pylist(range(5)),
        pa.from_pylist([-10, -5, 0, 5, 10])
    ]
    table = pa.Table.from_arrays(('a', 'b'), data, 'table_name')

    # TODO: Use this part once from_pandas is implemented
    # data = {'a': range(5), 'b': [-10, -5, 0, 5, 10]}
    # df = pd.DataFrame(data)
    # pa.Table.from_pandas(df)

    df = table.to_pandas()
    assert set(df.columns) == set(('a', 'b'))
    assert df.shape == (5, 2)
    assert df.loc[0, 'b'] == -10
Example #24
0
    def test_from_array(self):
        arr = pa.from_pylist([0, 1, 2, 3, 4])

        col1 = pa.Column.from_array('foo', arr)
        col2 = pa.Column.from_array(pa.field('foo', arr.type), arr)

        assert col1.equals(col2)
 def test_unicode(self):
     data = [u'foo', u'bar', None, u'mañana']
     arr = pa.from_pylist(data)
     assert len(arr) == 4
     assert arr.null_count == 1
     assert arr.type == pa.string()
     assert arr.to_pylist() == data
 def test_integer(self):
     expected = [1, None, 3, None]
     arr = pa.from_pylist(expected)
     assert len(arr) == 4
     assert arr.null_count == 2
     assert arr.type == pa.int64()
     assert arr.to_pylist() == expected
Example #27
0
 def test_integer(self):
     expected = [1, None, 3, None]
     arr = pyarrow.from_pylist(expected)
     assert len(arr) == 4
     assert arr.null_count == 2
     assert arr.type == pyarrow.int64()
     assert arr.to_pylist() == expected
 def test_double(self):
     data = [1.5, 1, None, 2.5, None, None]
     arr = pa.from_pylist(data)
     assert len(arr) == 6
     assert arr.null_count == 3
     assert arr.type == pa.float64()
     assert arr.to_pylist() == data
Example #29
0
 def test_unicode(self):
     data = [u'foo', u'bar', None, u'mañana']
     arr = pyarrow.from_pylist(data)
     assert len(arr) == 4
     assert arr.null_count == 1
     assert arr.type == pyarrow.string()
     assert arr.to_pylist() == data
Example #30
0
 def test_list_of_int(self):
     data = [[1, 2, 3], [], None, [1, 2]]
     arr = pyarrow.from_pylist(data)
     assert len(arr) == 4
     assert arr.null_count == 1
     assert arr.type == pyarrow.list_(pyarrow.int64())
     assert arr.to_pylist() == data
 def test_list_of_int(self):
     data = [[1, 2, 3], [], None, [1, 2]]
     arr = pa.from_pylist(data)
     assert len(arr) == 4
     assert arr.null_count == 1
     assert arr.type == pa.list_(pa.int64())
     assert arr.to_pylist() == data
Example #32
0
 def test_unicode(self):
     data = [u("foo"), u("bar"), None, u("arrow")]
     arr = pyarrow.from_pylist(data)
     assert len(arr) == 4
     assert arr.null_count == 1
     assert arr.type == pyarrow.string()
     assert arr.to_pylist() == [u("foo"), u("bar"), None, u("arrow")]
Example #33
0
 def test_boolean(self):
     expected = [True, None, False, None]
     arr = pyarrow.from_pylist(expected)
     assert len(arr) == 4
     assert arr.null_count == 2
     assert arr.type == pyarrow.bool_()
     assert arr.to_pylist() == expected
Example #34
0
 def test_unicode(self):
     data = [u('foo'), u('bar'), None, u('arrow')]
     arr = pyarrow.from_pylist(data)
     assert len(arr) == 4
     assert arr.null_count == 1
     assert arr.type == pyarrow.string()
     assert arr.to_pylist() == [u('foo'), u('bar'), None, u('arrow')]
 def test_boolean(self):
     expected = [True, None, False, None]
     arr = pa.from_pylist(expected)
     assert len(arr) == 4
     assert arr.null_count == 2
     assert arr.type == pa.bool_()
     assert arr.to_pylist() == expected
Example #36
0
 def test_double(self):
     data = [1.5, 1, None, 2.5, None, None]
     arr = pyarrow.from_pylist(data)
     assert len(arr) == 6
     assert arr.null_count == 3
     assert arr.type == pyarrow.double()
     assert arr.to_pylist() == data
Example #37
0
 def test_pandas(self):
     data = [pa.from_pylist([-10, -5, 0, 5, 10])]
     table = pa.Table.from_arrays(data, names=['a'])
     column = table.column(0)
     series = column.to_pandas()
     assert series.name == 'a'
     assert series.shape == (5, )
     assert series.iloc[0] == -10
Example #38
0
 def test_decimal_different_precisions(self):
     data = [
         decimal.Decimal('1234234983.183'),
         decimal.Decimal('80943244.234')
     ]
     type = pa.decimal(precision=13, scale=3)
     arr = pa.from_pylist(data, type=type)
     assert arr.to_pylist() == data
Example #39
0
 def test_bytes(self):
     u1 = b"ma\xc3\xb1ana"
     data = [b"foo", u1.decode("utf-8"), None]  # unicode gets encoded,
     arr = pyarrow.from_pylist(data)
     assert len(arr) == 3
     assert arr.null_count == 1
     assert arr.type == pyarrow.binary()
     assert arr.to_pylist() == [b"foo", u1, None]
Example #40
0
 def test_decimal_large_integer(self):
     data = [
         decimal.Decimal('-394029506937548693.42983'),
         decimal.Decimal('32358695912932.01033')
     ]
     type = pa.decimal(precision=23, scale=5)
     arr = pa.from_pylist(data, type=type)
     assert arr.to_pylist() == data
Example #41
0
def test_table_basics():
    data = [
        pa.from_pylist(range(5)),
        pa.from_pylist([-10, -5, 0, 5, 10])
    ]
    table = pa.Table.from_arrays(data, names=('a', 'b'))
    assert len(table) == 5
    assert table.num_rows == 5
    assert table.num_columns == 2
    assert table.shape == (5, 2)
    assert table.to_pydict() == OrderedDict([
        ('a', [0, 1, 2, 3, 4]),
        ('b', [-10, -5, 0, 5, 10])
    ])

    for col in table.itercolumns():
        for chunk in col.data.iterchunks():
            assert chunk is not None
Example #42
0
 def test_basics(self):
     data = [pa.from_pylist([-10, -5, 0, 5, 10])]
     table = pa.Table.from_arrays(data, names=['a'])
     column = table.column(0)
     assert column.name == 'a'
     assert column.length() == 5
     assert len(column) == 5
     assert column.shape == (5, )
     assert column.to_pylist() == [-10, -5, 0, 5, 10]
Example #43
0
    def test_bool(self):
        arr = A.from_pylist([True, None, False, None])

        v = arr[0]
        assert isinstance(v, A.BooleanValue)
        assert repr(v) == "True"
        assert v.as_py() is True

        assert arr[1] is A.NA
Example #44
0
    def test_bool(self):
        arr = A.from_pylist([True, None, False, None])

        v = arr[0]
        assert isinstance(v, A.BooleanValue)
        assert repr(v) == "True"
        assert v.as_py() == True

        assert arr[1] is A.NA
Example #45
0
    def test_int64(self):
        arr = A.from_pylist([1, 2, None])

        v = arr[0]
        assert isinstance(v, A.Int64Value)
        assert repr(v) == "1"
        assert v.as_py() == 1

        assert arr[2] is A.NA
Example #46
0
    def test_int64(self):
        arr = A.from_pylist([1, 2, None])

        v = arr[0]
        assert isinstance(v, A.Int64Value)
        assert repr(v) == "1"
        assert v.as_py() == 1

        assert arr[2] is A.NA
Example #47
0
    def test_string_format(self):
        arr = pyarrow.from_pylist(['', None, 'foo'])
        result = fmt.array_format(arr)
        expected = """\
[
  '',
  NA,
  'foo'
]"""
        assert result == expected
Example #48
0
 def test_bytes(self):
     u1 = b'ma\xc3\xb1ana'
     data = [b'foo',
             u1.decode('utf-8'),  # unicode gets encoded,
             None]
     arr = pyarrow.from_pylist(data)
     assert len(arr) == 3
     assert arr.null_count == 1
     assert arr.type == pyarrow.binary()
     assert arr.to_pylist() == [b'foo', u1, None]
Example #49
0
 def test_date(self):
     data = [datetime.date(2000, 1, 1), None, datetime.date(1970, 1, 1), datetime.date(2040, 2, 26)]
     arr = pyarrow.from_pylist(data)
     assert len(arr) == 4
     assert arr.type == pyarrow.date()
     assert arr.null_count == 1
     assert arr[0].as_py() == datetime.date(2000, 1, 1)
     assert arr[1].as_py() is None
     assert arr[2].as_py() == datetime.date(1970, 1, 1)
     assert arr[3].as_py() == datetime.date(2040, 2, 26)
Example #50
0
 def test_pandas(self):
     data = [
         pa.from_pylist([-10, -5, 0, 5, 10])
     ]
     table = pa.Table.from_arrays(data, names=['a'], name='table_name')
     column = table.column(0)
     series = column.to_pandas()
     assert series.name == 'a'
     assert series.shape == (5,)
     assert series.iloc[0] == -10
Example #51
0
def test_string_format():
    arr = pyarrow.from_pylist(['', None, 'foo'])
    result = fmt.array_format(arr)
    expected = """\
[
  '',
  NA,
  'foo'
]"""
    assert result == expected
Example #52
0
 def test_basics(self):
     data = [
         pa.from_pylist([-10, -5, 0, 5, 10])
     ]
     table = pa.Table.from_arrays(data, names=['a'], name='table_name')
     column = table.column(0)
     assert column.name == 'a'
     assert column.length() == 5
     assert len(column) == 5
     assert column.shape == (5,)
     assert column.to_pylist() == [-10, -5, 0, 5, 10]
Example #53
0
    def test_list_format(self):
        arr = pyarrow.from_pylist([[1], None, [2, 3]])
        result = fmt.array_format(arr)
        expected = """\
[
  [1],
  NA,
  [2,
   3]
]"""
        assert result == expected
Example #54
0
    def test_list_format(self):
        arr = pyarrow.from_pylist([[1], None, [2, 3]])
        result = fmt.array_format(arr)
        expected = """\
[
  [1],
  NA,
  [2,
   3]
]"""
        assert result == expected
 def test_bytes(self):
     u1 = b'ma\xc3\xb1ana'
     data = [
         b'foo',
         u1.decode('utf-8'),  # unicode gets encoded,
         None
     ]
     arr = pa.from_pylist(data)
     assert len(arr) == 3
     assert arr.null_count == 1
     assert arr.type == pa.binary()
     assert arr.to_pylist() == [b'foo', u1, None]
Example #56
0
    def test_bytes(self):
        arr = A.from_pylist([b'foo', None, u('bar')])

        v = arr[0]
        assert isinstance(v, A.BinaryValue)
        assert v.as_py() == b'foo'

        assert arr[1] is A.NA

        v = arr[2].as_py()
        assert v == b'bar'
        assert isinstance(v, bytes)
Example #57
0
    def test_long_array_format(self):
        arr = pyarrow.from_pylist(range(100))
        result = fmt.array_format(arr, window=2)
        expected = """\
[
  0,
  1,
  ...
  98,
  99
]"""
        assert result == expected
Example #58
0
    def test_double(self):
        arr = A.from_pylist([1.5, None, 3])

        v = arr[0]
        assert isinstance(v, A.DoubleValue)
        assert repr(v) == "1.5"
        assert v.as_py() == 1.5

        assert arr[1] is A.NA

        v = arr[2]
        assert v.as_py() == 3.0