def test_hash_on_from_array(self): s1 = from_array([1, 2, 3, 4, 5], hash_func=None) s2 = from_array([1, 2, 3, 4, 5, 6, 7], hash_func=None) self.assertEqual(s1.hash, s2.hash) s1 = from_array([1, 2, 3, 4, 5], hash_func=lambda a: len(a)) s2 = from_array([1, 2, 3, 4, 5, 6, 7], hash_func=lambda a: len(a)) self.assertNotEqual(s1.hash, s2.hash)
def test_bucket_iterator(self): ds = from_array([list(range(i)) for i in range(100, 1000)]) iter = BucketIterator( ds, batch_size=100, num_workers=0, batch_transforms=None, sort_key=lambda a: a, ) a = list(iter) self.assertEqual( list, type(a[0]), ) self.assertEqual(900, sum(len(d) for d in a)) iter = BucketIterator( ds, batch_size=200, num_workers=0, batch_transforms=None, sort_key=lambda a: a, ) a = list(iter) self.assertEqual(5, len(a)) self.assertEqual([100, 200, 200, 200, 200], sorted([len(b) for b in a])) l = list(sorted([len(d) for b in a for d in b])) self.assertEqual(list(range(100, 1000)), l)
def test_iterator(self): ds = from_array(list(range(1000))) iter = Iterator( ds, batch_size=100, shuffle=False, num_workers=0, batch_transforms=None, ) a = list(iter) self.assertEqual( list, type(a[0]), ) self.assertEqual(1000, sum(len(d) for d in a)) self.assertEqual(list(range(100)), a[0]) iter = Iterator( ds, batch_size=300, shuffle=True, num_workers=0, batch_transforms=None, ) a = list(iter) self.assertEqual(4, len(a)) self.assertEqual(300, len(a[2])) self.assertEqual(100, len(a[3])) l = list(sorted([d for b in a for d in b])) self.assertEqual(list(range(1000)), l)
def test_random_access(self): s1 = from_items(1, 2, 3, 4, 5) self.assertEqual(1, s1[0]) self.assertEqual(2, s1[1]) self.assertEqual([3, 4, 5], list(s1[2:])) self.assertTrue(s1[2:].random_accessible) self.assertEqual(3, s1[2:][0]) self.assertEqual(4, s1[2:][1]) self.assertEqual(5, s1[2:][2]) self.assertRaises(IndexError, lambda: s1[2:][3]) self.assertEqual([1, 2, 3], list(s1[:3])) self.assertEqual([2, 3], list(s1[1:3])) s1 = from_array(list(range(20))) self.assertEqual(0, s1[0]) self.assertEqual(1, s1[1]) self.assertEqual([5, 7, 9], list(s1[5:10:2])) self.assertEqual([5, 7, 9], list(s1[5:10:2])) self.assertEqual(6, s1[4:10:2][1]) self.assertEqual([4, 7], list(s1[4:10:3])) self.assertEqual([4, 7, 10], list(s1[4:11:3])) self.assertEqual(17, s1[-3]) self.assertEqual([], list(s1[999:])) self.assertEqual([17, 18, 19], list(s1[-3:])) self.assertEqual([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], list(s1[:-10])) for i in range(-10, 30): self.assertEqual(list(range(20)), list(s1[:i]) + list(s1[i:]))
def test_pipe_combination(self): l = [1, 2, 3, 4, 5] m1 = mapped(lambda a: a * 2) m2 = mapped(lambda a: a + 1) m = m1 | m2 self.assertEqual([a * 2 + 1 for a in l], list(l | m)) self.assertEqual([a * 2 + 1 for a in l], list([m(a) for a in l])) l = [1, 2, 3, 4, 5] p1 = filtered(lambda a: a % 2 == 0) p2 = mapped(lambda a: a + 1) m = p1 | p2 self.assertEqual([a + 1 for a in l if a % 2 == 0], list(l | m)) self.assertRaises(TypeError, [m(a) for a in l]) l = from_array([{"a": i, "b": i * 2} for i in range(10)]) p1 = mapped(lambda a: a * 2) p2 = mapped(lambda a: a + 1) m = p1 | p2 self.assertEqual([{ "a": i * 2 + 1, "b": i * 2 } for i in range(10)], list(l | {"a": m})) l = [1, 2, 3, 4, 5] p1 = mapped(lambda a: {"a": a, "b": 2 * a}) p2 = {"a": lambda a: 3 * a} m = p1 | p2 self.assertEqual([{ "a": i * 3, "b": i * 2 } for i in range(1, 6)], list(l | m))
def test_add_mul(self): s1 = from_array([1, 2, 3, 4, 5]) s2 = from_array([5, 4, 3, 2, 1, 0]) a = s1 + s2 self.assertEqual(11, len(a)) self.assertEqual([1, 2, 3, 4, 5, 5, 4, 3, 2, 1, 0], list(a)) b = s1 * s2 self.assertEqual(5, len(b)) self.assertEqual([ (1, 5), (2, 4), (3, 3), (4, 2), (5, 1), ], list(b))
def test_choice(self): s = from_array([2 * n for n in range(10)]) ind = from_array([9, 4, 0]) res = choice(s, ind) self.assertEqual([18, 8, 0], list(res)) self.assertEqual([18, 8, 0], list( choice(from_array([2 * n for n in range(10)]), [9, 4, 0]))) self.assertEqual([18, 8, 0], list(choice([2 * n for n in range(10)], [9, 4, 0]))) self.assertTrue(res.has_length) self.assertTrue(res.random_accessible) s1 = from_array(list(range(10)), hash_func=lambda l: l[0]) s2 = from_array(list(range(1, 11)), hash_func=lambda l: l[0]) ind1 = from_array([1, 2, 3], hash_func=lambda l: l[0]) ind2 = from_array([2, 3, 4], hash_func=lambda l: l[0]) self.assertNotEqual(choice(s1, ind1).hash, choice(s1, ind2).hash) self.assertNotEqual(choice(s1, ind1).hash, choice(s2, ind1).hash) ind1 = [1, 2, 3] ind2 = [2, 3, 4] self.assertNotEqual(choice(s1, ind1).hash, choice(s1, ind2).hash) self.assertNotEqual(choice(s1, ind1).hash, choice(s2, ind1).hash)
def test_count(self): s1 = from_array([1, 2, 3, 4, 5]) m = s1.flat_map(lambda a: [a]) self.assertFalse(m.has_length) l = m.count() self.assertTrue(m.has_length) self.assertEqual(5, len(m)) self.assertEqual(len(m), l)
def test_random_permutation(self): s = from_array(list(range(1000))) res1 = permutation(s) res2 = permutation(s) self.assertNotEqual(list(res1), list(res2)) self.assertTrue(0 < res1[0] < 1000) res1 = permutation(s, seed=42) res2 = permutation(s, seed=42) self.assertEqual(list(res1), list(res2))
def test_random_choice(self): s = from_array(list(range(1000))) res1 = random_choice(s, 3) res2 = random_choice(s, 3) self.assertNotEqual(list(res1), list(res2)) self.assertTrue(0 < res1[0] < 1000) res1 = random_choice(s, 3, seed=42) res2 = random_choice(s, 3, seed=42) self.assertEqual(list(res1), list(res2))
def test_simple_length(self): s2 = from_array([1, 2, 3, 4, 5]) self.assertEqual(5, len(s2)) self.assertEqual(2, s2[1]) self.assertEqual([1, 2, 3, 4, 5], list(s2)) s2 = from_items(1, 2, 3, 4, 5) self.assertEqual(5, len(s2)) self.assertEqual(2, s2[1]) self.assertEqual([1, 2, 3, 4, 5], list(s2))
def test_flat_map(self): s1 = from_items(1, 2, 3) m = s1.flat_map(lambda a: range(a)) self.assertFalse(m.has_length) self.assertEqual([0, 0, 1, 0, 1, 2], list(m)) m2 = s1 | flat_mapped(lambda a: range(a)) self.assertEqual([0, 0, 1, 0, 1, 2], list(m2)) # flatmap could also flatten the Source object m3 = s1 | flat_mapped(lambda a: from_array(list(range(a)))) self.assertEqual([0, 0, 1, 0, 1, 2], list(m3)) s2 = from_items([0, 1], [2, 3, 4]) m3 = s2 | flat_mapped(lambda a: a) self.assertEqual([0, 1, 2, 3, 4], list(m3))
def test_window(self): def ic_ra_test(m, name): for i in range(-10, 20): self.assertEqual(list(m)[i:], list(m[i:]), f"{name}: i is {i}") for i in range(-len(m), len(m)): self.assertEqual(list(m)[i], m[i], f"{name}: i is {i}") s = from_array(list(range(10))) m = s | windowed(3, drop_first=True) self.assertTrue(m.has_length) self.assertEqual(8, len(m)) self.assertEqual(len(m), len(list(m))) self.assertEqual([(i - 2, i - 1, i) for i in range(2, 10)], list(m)) ic_ra_test(m, "m1") m = s | windowed(3, drop_first=False) self.assertTrue(m.has_length) self.assertEqual(10, len(m)) self.assertEqual( [(i - 2 if i >= 2 else None, i - 1 if i >= 1 else None, i) for i in range(0, 10)], list(m)) ic_ra_test(m, "m2") m = s | windowed(1, drop_first=True) self.assertEqual(10, len(m)) self.assertEqual(len(m), len(list(m))) self.assertEqual([(i, ) for i in range(10)], list(m)) ic_ra_test(m, "m3") m4 = s | windowed(1, drop_first=False) self.assertEqual(10, len(m4)) self.assertEqual(list(m), list(m4)) ic_ra_test(m4, "m4") m = s | windowed(100, drop_first=True) self.assertEqual(0, len(m)) self.assertEqual(len(m), len(list(m))) m = s | windowed(100, drop_first=False) self.assertEqual(10, len(m)) self.assertEqual( [(tuple(None for _ in range(100)) + tuple(range(i + 1)))[-100:] for i in range(10)], list(m)) self.assertEqual(list(m), list(m)) ic_ra_test(m, "m5")
from flowder.pipes import to_dict, select from flowder.processors import Aggregator from flowder.source import Source from flowder.source.base import flat_mapped from flowder.utils import from_array s = from_array(list(range(1, 11))) for a, b in zip(s, range(1, 11)): assert a == b fm = s | flat_mapped(lambda n: list(range(n))) reference = [a for b in range(1, 11) for a in range(b)] for a, b in zip(fm, reference): assert a == b s = from_array([(n, 10 - n) for n in range(0, 11)]) a = s | select(0) b = s | select(1) z = a * b for a, b in zip(z, s): assert a == b data = [] class TestProcess(Aggregator): def feed_data(self, d: Source): for item in d:
def test_concat(self): s1 = from_array([1, 2, 3]) s2 = from_array([4, 5, 6]) s3 = from_array([7, 8, 9]) s = concat(s1, s2, s3) self.assertEqual(list(range(1, 10)), list(s))