def test_hash2(self): s1 = from_items(1, 2, 3, 4, 5) s2 = s1 | mapped(lambda a: a * 2) s2_2 = s1.map(lambda a: a * 2) self.assertEqual(s2.hash, s2_2.hash) self.assertNotEqual(s1.hash, s2.hash) s3a = s1 | mapped(lambda a: a * 2, dependencies=[2]) s3b = s1 | mapped(lambda a: a * 3, dependencies=[3]) self.assertNotEqual(s3a.hash, s3b.hash)
def test_for_array_pipe(self): l = [1, 2, 3, 4, 5] m = l | mapped(lambda x: 2 * x) self.assertTrue(m.has_length) self.assertEqual([2 * n for n in l], list(m)) l = [1, 2, 3, 4, 5] m = (a for a in l) | mapped(lambda x: 2 * x) self.assertFalse(m.has_length) self.assertEqual([2 * n for n in l], list(m))
def test_mapped_dict(self): s1 = from_items(1, 2, 3, 4, 5) s2 = from_items(1, 1, 1, 1, 1) r = zipped(s1, s2) | mapped(lambda a: {"a": a[0], "b": a[1]}) self.assertEqual({"a": 3, "b": 1}, r[2]) r2 = r | { "a": mapped(lambda a: a + 1), "b": mapped(lambda a: a * 2), } r3 = r | { "a": lambda a: a + 1, "b": lambda a: a * 2, } r4 = r | mapped({ "a": lambda a: a + 1, "b": lambda a: a * 2, }) self.assertEqual([ { "a": 2, "b": 2 }, { "a": 3, "b": 2 }, { "a": 4, "b": 2 }, { "a": 5, "b": 2 }, { "a": 6, "b": 2 }, ], list(r2)) self.assertEqual(list(r2), list(r3)) self.assertEqual(list(r2), list(r4))
def test_pipe_combination(self): l = [1, 2, 3, 4, 5] m1 = mapped(lambda a: a * 2) m2 = mapped(lambda a: a + 1) m = m1 | m2 self.assertEqual([a * 2 + 1 for a in l], list(l | m)) self.assertEqual([a * 2 + 1 for a in l], list([m(a) for a in l])) l = [1, 2, 3, 4, 5] p1 = filtered(lambda a: a % 2 == 0) p2 = mapped(lambda a: a + 1) m = p1 | p2 self.assertEqual([a + 1 for a in l if a % 2 == 0], list(l | m)) self.assertRaises(TypeError, [m(a) for a in l]) l = from_array([{"a": i, "b": i * 2} for i in range(10)]) p1 = mapped(lambda a: a * 2) p2 = mapped(lambda a: a + 1) m = p1 | p2 self.assertEqual([{ "a": i * 2 + 1, "b": i * 2 } for i in range(10)], list(l | {"a": m})) l = [1, 2, 3, 4, 5] p1 = mapped(lambda a: {"a": a, "b": 2 * a}) p2 = {"a": lambda a: 3 * a} m = p1 | p2 self.assertEqual([{ "a": i * 3, "b": i * 2 } for i in range(1, 6)], list(l | m))
def test_to_dict(self): s1 = from_items(1, 2, 3, 4, 5) s2 = from_items(1, 1, 1, 1, 1) m = zipped(s1, s2) | to_dict("a", "b") m2 = zipped(s1, s2) | mapped(lambda a: {"a": a[0], "b": a[1]}) self.assertEqual(list(m2), list(m)) self.assertRaises(AssertionError, lambda: list(zipped(s1, s2) | to_dict("a"))) self.assertRaises( AssertionError, lambda: list(zipped(s1, s2) | to_dict("a", "b", "c"))) self.assertRaises(AssertionError, lambda: zipped(s1, s2) | to_dict("a", "a"))
def test_decorator(self): def kk(k): @depend(k) def f(a): return a * k return f s1 = from_items(1, 2, 3, 4, 5) m1 = s1 | mapped(kk(2)) m2 = s1 | mapped(kk(3)) self.assertNotEqual(m1.hash, m2.hash) m1 = s1 | mapped(lambda a: a * 2, dependencies=[2]) m2 = s1 | mapped(lambda a: a * 3, dependencies=[3]) self.assertNotEqual(m1.hash, m2.hash) m1 = s1 | mapped(lambda a: a * 2) m2 = s1 | mapped(lambda a: a * 3) self.assertEqual(m1.hash, m2.hash)
from flowder.utils.image import to_image iris = csv("data/IRIS.csv", header=None) for data in iris: assert isinstance(data, tuple) first = iris[0] all(a == b for a, b in zip(first, data)) break for index, values in iris: assert np.issubdtype(type(index), np.integer) assert isinstance(values, dict) images_dir_path = Path("data/celebA/img_align_celeba") d = directory(images_dir_path) for p in d | filtered(lambda a: a.suffix == ".jpg"): assert isinstance(p, Path) for p in d | filtered(lambda a: a.suffix == ".jpg"): assert isinstance(p, Path) anno = csv("data/celebA/list_attr_celeba.txt", header=1, sep="\s+") assert len(anno) == 8 imgs = anno | select(0) | mapped(lambda name: images_dir_path / name) | to_image() # img = Field("img", process=mean(), postprocess=whitening()) for img in imgs: assert isinstance(img, Image.Image)
def wrapper(f): return mapped(f, dependencies)
ja = lines("data/kftt.ja") en = lines("data/kftt.en") zipped = ja * en assert len(zipped) == len(ja) for data in zipped: assert isinstance(data, tuple) assert len(data) == 2 j, e = data assert isinstance(j, str) assert isinstance(e, str) break dataset = ja * en | mapped(lambda t: {"ja": t[0], "en": t[1]}) for example in dataset: assert isinstance(example, dict) assert "ja" in example assert isinstance(example["ja"], str) assert isinstance(example["en"], str) special_delimiter_text = lines("data/special_delimiter.txt") | split("|||") for third_column in special_delimiter_text | select(3): assert isinstance(third_column, str) break dataset = special_delimiter_text | select(3) for japanese_column in dataset: pass for japanese_column in dataset:
import pathlib from flowder.pipes import split, select from flowder.source.base import mapped from flowder.utils import lines ls = lines("data/kftt.ja") assert len(ls) == 10, "there should be 10 lines" for s in ls: assert isinstance(s, str), "Source iterate the raw values" break for s in ls | mapped(lambda x: len(x)): assert isinstance(s, int), "Source iterate the raw values" break for spl in ls | split(): assert isinstance(spl, list) assert isinstance(spl[0], str) break delimiter = "|||" special_delimiter_text = lines("data/special_delimiter.txt") | split(delimiter) for third_column in special_delimiter_text | select(3): assert isinstance(third_column, str) break
def add_sos(sos_token=2) -> Mapped: return mapped(_AddToken(sos_token, head=True), dependencies=["add_sos"])
def add_eos(eos_token=3) -> Mapped: return mapped(_AddToken(eos_token, head=False), dependencies=["add_sos"])
def test_mapped(self): s1 = from_items(1, 2, 3, 4, 5) self.assertRaises(TypeError, lambda: s1 | 42) # must has type Pipe or pattern m = s1 | mapped(lambda a: a + 1) m2 = s1 | (lambda a: a + 1) self.assertEqual([2, 3, 4, 5, 6], list(m)) self.assertEqual(m.parents, [s1]) self.assertEqual(len(m), 5) self.assertEqual(list(m), list(m2), "callable is assumed to be mapped implicitly") m = s1 | mapped(lambda a: a + 1) | mapped(lambda a: a * 2) m2 = s1 | (lambda a: a + 1) | mapped(lambda a: a * 2) m3 = s1 | mapped(lambda a: a + 1) | (lambda a: a * 2) m4 = s1 | (mapped(lambda a: a + 1) | mapped(lambda a: a * 2)) self.assertEqual([4, 6, 8, 10, 12], list(m)) self.assertEqual(5, len(m)) self.assertEqual(list(m), list(m2)) self.assertEqual(list(m), list(m3)) self.assertEqual(list(m), list(m4)) self.assertRaises(TypeError, lambda: mapped(lambda a: a + 1) | 42) self.assertRaises(TypeError, lambda: 42 | mapped(lambda a: a + 1)) s1 = from_items(1, 2, 3, 4, 5) s2 = from_items(1, 1, 1, 1, 1) z = zipped(s1, s2) r = z | (mapped(lambda a: a + 1), mapped(lambda a: a - 1)) r2 = z | ((lambda a: a + 1), mapped(lambda a: a - 1)) r3 = z | (mapped(lambda a: a + 1), (lambda a: a - 1)) r4 = z | ((lambda a: a + 1), (lambda a: a - 1)) self.assertEqual([ (2, 0), (3, 0), (4, 0), (5, 0), (6, 0), ], list(r)) self.assertEqual(list(r), list(r2)) self.assertEqual(list(r), list(r3)) self.assertEqual(list(r), list(r4)) r = z | (mapped(lambda a: a + 1), None) | (mapped(lambda a: a * 2), None) self.assertEqual([ (2 * 2, 1), (3 * 2, 1), (4 * 2, 1), (5 * 2, 1), (6 * 2, 1), ], list(r)) r = z | (None, mapped(lambda a: a + 1)) self.assertEqual([ (1, 2), (2, 2), (3, 2), (4, 2), (5, 2), ], list(r)) r = z | (mapped(lambda a: a + 1), None) | (None, mapped(lambda a: a * 2)) self.assertEqual([ (2, 2), (3, 2), (4, 2), (5, 2), (6, 2), ], list(r))