def test_longer_table(): tube = tubes.Count(5).multi(lambda x: (tubes.Count(), x, x.to(object).to( str), x.gt(1000))).first(500000) table = tube.to_pyarrow(('index', 'src', 'strval', 'is_big')) assert isinstance(table, pa.Table) assert [str(c.type) for c in table.columns] == ['int64', 'int64', 'string', 'bool'] assert table.shape == (500000, 4) assert dict(table.to_pandas().iloc[-1]) == { 'index': 499999, 'src': 500004, 'strval': '500004', 'is_big': True }
def test_doubles(): tube = tubes.Count(2).to(float).first(4) table = tube.to_pyarrow(('a', )) assert isinstance(table, pa.Table) assert str(table.columns[0].type) == 'double' assert table.to_pandas().to_dict() == { 'a': {0: 2, 1: 3, 2: 4, 3: 5}, }
def test_enumerate(): tube = tubes.Count(2).enumerate().first(4) table = tube.to_pyarrow(('a', 'b')) assert isinstance(table, pa.Table) assert str(table.columns[0].type) == 'int64' assert str(table.columns[1].type) == 'int64' assert table.to_pandas().to_dict() == { 'a': {0: 0, 1: 1, 2: 2, 3: 3}, 'b': {0: 2, 1: 3, 2: 4, 3: 5} }
def test_skip_if_lambda(): tube = tubes.Count().first(6).skip_if(lambda x: x.lt(3)) assert list(tube) == [3, 4, 5]
def test_skip_if(): tube = tubes.Count().first(5).skip_if( tubes.Each([False, True, False, True]).to(bool)) assert list(tube) == [0, 2]
def test_skip_unless_lambda(): tube = tubes.Count().skip_unless(lambda x: x.lt(3).first(5)) assert list(tube) == [0, 1, 2]
def test_skip_unless(): tube = tubes.Count().skip_unless( tubes.Each([False, True, False, True]).to(bool)) assert list(tube) == [1, 3]
def test_fill_ndarray_same_type_fields(): nd = tubes.Count(5).first(4).enumerate().ndarray(fields=True) assert nd.shape == (4, ) assert [tuple(x) for x in nd] == [(0, 5), (1, 6), (2, 7), (3, 8)]
def test_fill_ndarray_integers(): nd = tubes.Count().first(100).to(int).ndarray(estimated_rows=2) assert nd.shape == (100, ) assert list(nd) == list(range(100))
def test_int(): tube = tubes.Count().to(int).first(20_000) table = tube.to_pyarrow(('a', )) assert isinstance(table, pa.Table) assert len(table) == 20_000 assert list(table.to_pandas()['a']) == list(range(20_000))
def test_enumerate(): c = tubes.Count(2).enumerate().first(4) assert list(c) == [(0, 2), (1, 3), (2, 4), (3, 5)]