def test_worker_killed(nworkers: int, sig: int) -> None: """Test what happens when 'funsies worker' gets killed.""" # std import os def kill_funsies_worker(*inp: bytes) -> bytes: pid = os.getppid() os.kill(pid, sig) time.sleep(1.0) return b"what" def cap(inp: bytes) -> bytes: return inp.upper() with f.ManagedFun(nworkers=nworkers) as db: wait_for_workers(db, nworkers) s1 = f.reduce(kill_funsies_worker, b"bla bla", b"bla bla", opt=f.options(timeout=5)) s1b = f.morph(cap, s1) f.execute(s1b) if nworkers == 1: # no other workers to pick up the slack with pytest.raises(TimeoutError): f.wait_for(s1b, timeout=1) else: # everything is ok f.wait_for(s1b, timeout=5) assert f.take(s1b) == b"WHAT"
def test_double_execution(nworkers: int) -> None: """Test multiple executions of the same task.""" # This test will fail if a job is re-executed multiple times. # external from rq.job import get_current_job def track_runs(inp: bytes) -> bytes: job = get_current_job() db: Redis[bytes] = job.connection val = db.incrby("sentinel", 1) time.sleep(0.5) return str(val).encode() with f.ManagedFun(nworkers=nworkers): # wait_for_workers(db, nworkers) dat = f.put(b"bla bla") step1 = f.morph(track_runs, dat) step1a = f.shell( "cat file1", inp=dict(file1=step1), ) step1b = f.shell( "cat file2", inp=dict(file2=step1), ) f.execute(step1a) f.execute(step1b) f.wait_for(step1a, timeout=10.0) f.wait_for(step1b, timeout=10.0) assert f.take(step1a.stdout) == b"1"
def test_job_killed(nworkers: int, sig: int) -> None: """Test what happens when 'funsies worker' is ok but its job gets killed.""" # std import os def kill_self(*inp: bytes) -> bytes: pid = os.getpid() os.kill(pid, sig) time.sleep(2.0) return b"what" def cap(inp: bytes) -> bytes: return inp.upper() with f.ManagedFun(nworkers=nworkers) as db: wait_for_workers(db, nworkers) s1 = f.reduce(kill_self, b"bla bla", b"bla bla", opt=f.options(timeout=3)) s1b = f.morph(cap, s1) f.execute(s1b) # error f.wait_for(s1b, timeout=1) err = f.take(s1b, strict=False) assert isinstance(err, f.errors.Error) assert err.kind == f.errors.ErrorKind.KilledBySignal
def test_data_race(nworkers: int) -> None: """Test a data race when execute calls are interleaved.""" with f.ManagedFun(nworkers=nworkers): dat = f.put(b"bla bla") step1 = f.morph(lambda x: x.decode().upper().encode(), dat) step2 = f.shell( "cat file1 file2; grep 'bla' file2 file1 > file3; date >> file3", inp=dict(file1=step1, file2=dat), out=["file2", "file3"], ) f.execute(step1) f.execute(step2) f.wait_for(step1, timeout=20.0) f.wait_for(step2, timeout=20.0)
def test_dynamic_dump() -> None: """Test whether a dynamic DAG gets graphed properly.""" def split(a: bytes, b: bytes) -> list[dict[str, int]]: a = a.split() b = b.split() out = [] for ia, ib in zip(a, b): out += [{ "sum": int(ia.decode()) + int(ib.decode()), "product": int(ia.decode()) * int(ib.decode()), }] return out def apply(inp: Artefact[dict[str, Any]]) -> Artefact[str]: out = funsies.morph(lambda x: f"{x['sum']}//{x['product']}", inp) return out def combine(inp: Sequence[Artefact[str]]) -> Artefact[bytes]: def enc(inp: str) -> bytes: return inp.encode() out = [funsies.morph(enc, x, out=Encoding.blob) for x in inp] return funsies.utils.concat(*out) with funsies.ManagedFun(nworkers=1) as db: num1 = funsies.put(b"1 2 3 4 5") num2 = funsies.put(b"11 10 11 10 11") outputs = dynamic.sac( split, apply, combine, num1, num2, out=Encoding.blob, ) outputs = funsies.morph(lambda x: x, outputs) nodes, artefacts, labels, links = _graphviz.export(db, [outputs.hash]) assert len(artefacts) == 4 # not yet generated subdag parents print(len(artefacts)) funsies.execute(outputs) funsies.wait_for(outputs, timeout=1.0) nodes, artefacts, labels, links = _graphviz.export(db, [outputs.hash]) assert len(artefacts) == 22 # generated subdag parents assert funsies.take(outputs) == b"12//1112//2014//3314//4016//55"
def test_artefact_disk_distributed() -> None: """Test whether artefacts on disk works on different nodes.""" # funsies import funsies as f with tempfile.TemporaryDirectory() as td: with f.ManagedFun(nworkers=1, data_url=f"file://{td}"): dat = f.put(b"bla bla") step1 = f.morph(lambda x: x.decode().upper().encode(), dat) step2 = f.shell("cat file1 file2", inp=dict(file1=step1, file2=dat)) step2b = f.shell("cat file1", inp=dict(file1=step1)) f.execute(step2) f.wait_for(step2, 1.0) out = f.take(step2.stdout) assert out == b"BLA BLAbla bla" f.execute(step2b) f.wait_for(step2b, 1.0) out = f.take(step2b.stdout) assert out == b"BLA BLA"
def test_waiting_on_map_reduce() -> None: """Test waiting on the (linked) result of map-reduce.""" def split(a: bytes, b: bytes) -> list[dict[str, int]]: a = a.split() b = b.split() out = [] for ia, ib in zip(a, b): out += [{ "sum": int(ia.decode()) + int(ib.decode()), "product": int(ia.decode()) * int(ib.decode()), }] return out def apply(inp: Artefact) -> Artefact: out = funsies.morph(lambda x: f"{x['sum']}//{x['product']}", inp) return out def combine(inp: Sequence[Artefact]) -> Artefact: out = [ funsies.morph(lambda y: y.encode(), x, out=Encoding.blob) for x in inp ] return funsies.utils.concat(*out) with funsies.ManagedFun(nworkers=1): num1 = funsies.put(b"1 2 3 4 5") num2 = funsies.put(b"11 10 11 10 11") outputs = dynamic.sac( split, apply, combine, num1, num2, out=Encoding.blob, ) funsies.execute(outputs) funsies.wait_for(outputs, timeout=1.0) assert funsies.take(outputs) == b"12//1112//2014//3314//4016//55"
def test_raising_funsie() -> None: """Test funsie that raises an error. This test is specifically designed to catch the bug fixed in fa9af6a4 where funsies that raised did not release their locks, leading to a race condition. """ def raising_fun(*inp: str) -> bytes: raise RuntimeError("this funsie raises.") with f.ManagedFun(nworkers=2): s0a = f.morph(lambda x: x, "bla blabla") s0b = f.morph(lambda x: x, "blala") s1 = f.reduce(raising_fun, "bla bla", s0a, s0b, strict=True) f.execute(s1) f.wait_for(s1, timeout=2) with pytest.raises(UnwrapError): _ = f.take(s1) s2 = f.morph(lambda x: x, s1) f.execute(s2) f.wait_for(s2, timeout=0.5)
def test_timeout_deadlock() -> None: """Test funsies that time out. Here we explicitly check if dependents are still enqueued or if the whole thing deadlocks. """ def timeout_fun(*inp: str) -> bytes: time.sleep(3.0) return b"what" def cap(inp: bytes) -> bytes: return inp.capitalize() with f.ManagedFun(nworkers=2): # Test when python function times out s1 = f.reduce(timeout_fun, "bla bla", "bla bla", opt=f.options(timeout=1)) s1b = f.morph(cap, s1) # Test when shell function times out s2 = f.shell("sleep 20", "echo 'bla bla'", opt=f.options(timeout=1)) s2b = f.morph(cap, s2.stdouts[1]) f.execute(s1b, s2b) # Check err for reduce f.wait_for(s1b, timeout=1.5) err = f.take(s1b, strict=False) assert isinstance(err, f.errors.Error) assert err.kind == f.errors.ErrorKind.JobTimedOut assert err.source == s1.parent # Check err for shell f.wait_for(s2b, timeout=1.5) err = f.take(s2b, strict=False) assert isinstance(err, f.errors.Error) assert err.kind == f.errors.ErrorKind.JobTimedOut assert err.source == s2.hash
def test_cleanup() -> None: """Test truncation.""" # std import os def kill_self(*inp: bytes) -> bytes: pid = os.getpid() os.kill(pid, SIGKILL) time.sleep(2.0) return b"what" with f.ManagedFun(nworkers=1) as db: inp = "\n".join([f"{k}" for k in range(10)]).encode() fun = f.reduce(kill_self, inp) f.execute(fun) time.sleep(0.5) key1 = db.get( f._constants.join(f._constants.OPERATIONS, fun.parent, "owner")) f._context.cleanup_funsies(db) key2 = db.get( f._constants.join(f._constants.OPERATIONS, fun.parent, "owner")) assert key1 is not None assert key2 is None
def test_nested_map_reduce(nworkers: int) -> None: """Test nested map-reduce.""" # ------------------------------------------------------------------------ # Inner def sum_inputs(*inp: int) -> int: out = 0 for el in inp: out += el return out def split_inner(inp: str) -> list[int]: a = inp.split(" ") return [int(el) for el in a] def apply_inner(inp: Artefact) -> Artefact: return funsies.reduce(sum_inputs, inp, 1) def combine_inner(inp: Sequence[Artefact]) -> Artefact: return funsies.reduce(sum_inputs, *inp) # ------------------------------------------------------------------------ # outer def split_outer(inp: list[str], fac: int) -> list[str]: out = [x + f" {fac}" for x in inp] return out def apply_outer(inp: Artefact) -> Artefact: outputs = dynamic.sac( split_inner, apply_inner, combine_inner, inp, out=Encoding.json, ) return outputs def combine_outer(inp: Sequence[Artefact]) -> Artefact: out = [ funsies.morph(lambda y: f"{y}".encode(), x, out=Encoding.blob) for x in inp ] return funsies.utils.concat(*out, join=b",,") with funsies.ManagedFun(nworkers=nworkers): num1 = funsies.put("1 2 3 4 5") outputs = dynamic.sac(split_inner, apply_inner, combine_inner, num1, out=Encoding.json) funsies.execute(outputs) funsies.wait_for(outputs, timeout=30.0) assert funsies.take(outputs) == 20 # Now try the nested one num = funsies.put(["1 2", "3 4 7", "10 12", "1"]) factor = funsies.put(-2) # split -> 1 2 -2|3 4 7 -2|10 12 -2| 1 -2 # apply -> split2 -> 1, 2,-2 | 3,4,7,-2|10,12,-2|1,-2 # apply2 -> 2, 3,-1 | 4,5,8,-1|11,13,-1|2,-1 # combine2 -> 4|16|23|1 # combine -> 4,,16,,23,,1 ans = b"4,,16,,23,,1" outputs = dynamic.sac( split_outer, apply_outer, combine_outer, num, factor, out=Encoding.blob, ) funsies.execute(outputs) funsies.wait_for(outputs, timeout=30.0) assert funsies.take(outputs) == ans