def test_builder_init(env): @builder(name="foo") def bar(conf): "Test doc" return conf assert bar.name == "foo" assert bar.__name__ == "bar" assert bar.__doc__ == "Test doc" @builder() def baz(conf): return conf assert baz.name == "baz" assert baz.__name__ == "baz" with pytest.raises(ValueError, match=".*Provide at leas one of fn and name.*"): Builder(None) with pytest.raises(ValueError, match=".*is not a valid name for Builder.*"): Builder(lambda cfg: None) runtime = env.test_runtime() runtime.register_builder(Builder(None, "b1")) assert "b1" in runtime._builders assert "foo" in runtime._builders assert "bar" not in runtime._builders assert "baz" in runtime._builders
def test_builder_remove_inputs(env): runtime = env.test_runtime() col1 = runtime.register_builder(Builder(lambda c: c, "col1")) def b1(c): d = col1(c) yield return d.value def b2(c): d = col2(c) yield return d.value col2 = runtime.register_builder(Builder(b1, "col2")) col3 = runtime.register_builder(Builder(b1, "col3")) col4 = runtime.register_builder(Builder(b2, "col4")) runtime.compute(col4(1)) runtime.compute(col3(1)) runtime.drop(col2(1), drop_inputs=True) assert runtime.get_state(col1(1)) == JobState.DETACHED assert runtime.get_state(col2(1)) == JobState.DETACHED assert runtime.get_state(col3(1)) == JobState.DETACHED assert runtime.get_state(col4(1)) == JobState.DETACHED
def test_db_run_stats(env): runtime = env.test_runtime() runtime.configure_executor(heartbeat_interval=1) e1 = runtime.start_executor() c = runtime.register_builder(Builder(None, "col1")) _ = runtime.register_builder(Builder(None, "col2")) assert runtime.db.announce_entries( e1.id, [c("a"), c("b"), c("c"), c("d"), c("e")], []) assert runtime.db.get_entry_state(c.name, make_key("a")) == "announced" runtime.db._dump() entry = make_raw_entry(runtime, c, "a", "value", comp_time=1) runtime.db.set_entry_values(e1.id, [entry]) entry = make_raw_entry(runtime, c, "b", "value", comp_time=2) runtime.db.set_entry_values(e1.id, [entry]) entry = make_raw_entry(runtime, c, "c", "value", comp_time=3) runtime.db.set_entry_values(e1.id, [entry]) entry = make_raw_entry(runtime, c, "d", "value", comp_time=4) runtime.db.set_entry_values(e1.id, [entry]) r = runtime.db.get_run_stats("col1") assert pytest.approx(2.5) == r["avg"] assert pytest.approx(1.29099, 0.00001) == r["stdev"] assert r["count"] == 4 r = runtime.db.get_run_stats("col2") assert None is r["avg"] assert None is r["stdev"] assert r["count"] == 0
def test_builder_remove_inputs(env): runtime = env.test_runtime() col1 = runtime.register_builder(Builder(lambda c: c, "col1")) def b1(c): d = col1(c) yield return d.value def b2(c): d = col2(c) yield return d.value col2 = runtime.register_builder(Builder(b1, "col2")) col3 = runtime.register_builder(Builder(b1, "col3")) col4 = runtime.register_builder(Builder(b2, "col4")) runtime.compute(col4(1)) runtime.compute(col3(1)) runtime.remove(col2(1), remove_inputs=True) assert runtime.get_entry_state(col1(1)) is None assert runtime.get_entry_state(col2(1)) is None assert runtime.get_entry_state(col3(1)) is None assert runtime.get_entry_state(col4(1)) is None
def test_builder_drop(env): runtime = env.test_runtime() col1 = runtime.register_builder(Builder(lambda c: c, "col1")) def b2(c): d = col1(c) yield return d.value col2 = runtime.register_builder(Builder(b2, "col2")) runtime.compute(col2(1)) runtime.drop_builder("col1") assert runtime.get_state(col1(1)) == JobState.DETACHED assert runtime.get_state(col2(1)) == JobState.DETACHED assert runtime.get_state(col2(2)) == JobState.DETACHED runtime.compute(col2(1)) runtime.drop_builder("col2") assert runtime.get_state(col1(1)) == JobState.FINISHED assert runtime.get_state(col2(1)) == JobState.DETACHED assert runtime.get_state(col2(2)) == JobState.DETACHED runtime.compute(col2(1)) runtime.drop_builder("col2", drop_inputs=True) assert runtime.get_state(col1(1)) == JobState.DETACHED assert runtime.get_state(col2(1)) == JobState.DETACHED assert runtime.get_state(col2(2)) == JobState.DETACHED
def test_executor_error(env): runtime = env.test_runtime() executor = env.executor(runtime, heartbeat_interval=1, n_processes=2) executor.start() col0 = runtime.register_builder(Builder(lambda c: c, "col0")) def b1(c): d = col0(c) yield return 100 // d.value col1 = runtime.register_builder(Builder(b1, "col1")) def b2(c): data = [col1(x) for x in c] yield return sum(d.value for d in data) col2 = runtime.register_builder(Builder(b2, "col2")) assert not runtime.get_reports() with pytest.raises(JobFailedException, match=".*ZeroDivisionError.*"): assert runtime.compute(col2([10, 0, 20])) reports = runtime.get_reports() assert len(reports) == 2 assert reports[0].report_type == "error" assert reports[0].builder_name == "col1" assert reports[0].config == 0 assert "ZeroDivisionError" in reports[0].message assert runtime.get_entry_state(col0(0)) == "finished" assert runtime.compute(col2([10, 20])).value == 15 assert runtime.compute(col2([1, 2, 4])).value == 175 with pytest.raises(JobFailedException, match=".*ZeroDivisionError.*"): assert runtime.compute(col1(0)) assert runtime.get_entry_state(col0(0)) == "finished" assert runtime.compute(col2([10, 20])).value == 15 assert runtime.compute(col2([1, 2, 4])).value == 175 r1 = [col2([100 + x, 101 + x, 102 + x]) for x in range(20)] r2 = [col2([200 + x, 201 + x, 202 + x]) for x in range(20)] result = runtime.compute_many(r1 + [col2([303, 0, 304])] + r2, continue_on_error=True) for i in range(20): print(">>>>>>>>>>>>>>>", i, result[i], r1[i]) assert result[i] is not None for i in range(21, 41): assert result[i] is not None assert result[20] is None reports = runtime.get_reports() reports[0].report_type = "error" reports[0].config = [303, 0, 304] print(result)
def test_builder_deps(env): runtime = env.test_runtime() runtime.configure_executor(n_processes=1) counter_file = env.file_storage("counter", [0, 0]) def builder1(config): counter = counter_file.read() counter[0] += 1 counter_file.write(counter) return config * 10 def builder2(config): deps = [col1(x) for x in range(config)] yield counter = counter_file.read() counter[1] += 1 counter_file.write(counter) return sum(e.value for e in deps) col1 = runtime.register_builder(Builder(builder1, "col1")) col2 = runtime.register_builder(Builder(builder2, "col2")) e = runtime.compute(col2(5)) counter = counter_file.read() assert counter == [5, 1] assert e.value == 100 e = runtime.compute(col2(4)) counter = counter_file.read() assert counter == [5, 2] assert e.value == 60 runtime.remove_many([col1(0), col1(3)]) e = runtime.compute(col2(6)) counter = counter_file.read() assert counter == [8, 3] assert e.value == 150 e = runtime.compute(col2(6)) counter = counter_file.read() assert counter == [8, 3] assert e.value == 150 runtime.remove(col2(6)) e = runtime.compute(col2(5)) counter = counter_file.read() assert counter == [8, 4] assert e.value == 100 e = runtime.compute(col2(6)) counter = counter_file.read() assert counter == [8, 5] assert e.value == 150
def test_builder_double_task(env): runtime = env.test_runtime() def b2(config): tasks = [col1(10), col1(10), col1(10)] yield return sum(x.value for x in tasks) col1 = runtime.register_builder(Builder(lambda c: c * 10, "col1")) col2 = runtime.register_builder(Builder(b2, "col2")) assert runtime.compute(col2("abc")).value == 300
def test_builder_ref_in_compute(env): def builder_fn(c): yield col0(123) return 123 runtime = env.test_runtime() col0 = runtime.register_builder(Builder(lambda c: 123, "col0")) builder = runtime.register_builder(Builder(builder_fn, "col1")) with pytest.raises(Exception, match="computation phase"): runtime.compute(builder(1))
def test_builder_inconsistent_deps(env): def builder_fn(): c = col0(random.random()) print("C", c) yield return c.value + 1 runtime = env.test_runtime() col0 = runtime.register_builder(Builder(lambda c: 123, "col0")) bld = runtime.register_builder(Builder(builder_fn, "col1")) with pytest.raises(Exception, match="dependencies"): runtime.compute(bld())
def test_builder_inconsistent_deps(env): def builder_fn(c): import random col0(random.random()) yield col0(123) return 123 runtime = env.test_runtime() col0 = runtime.register_builder(Builder(lambda c: 123, "col0")) builder = runtime.register_builder(Builder(builder_fn, "col1")) with pytest.raises(Exception, match="dependencies"): runtime.compute(builder(1))
def test_builder_upgrade(env): runtime = env.test_runtime() runtime.configure_executor(n_processes=1) def creator(config): return config * 10 def adder(config): a = col1(config["a"]) b = col1(config["b"]) yield return a.value + b.value def upgrade(config): config["c"] = config["a"] + config["b"] return config def upgrade_confict(config): del config["a"] return config col1 = runtime.register_builder(Builder(creator, "col1")) col2 = runtime.register_builder(Builder(adder, "col2")) runtime.compute(col1(123)) runtime.compute_many([ col2(c) for c in [{ "a": 10, "b": 12 }, { "a": 14, "b": 11 }, { "a": 17, "b": 12 }] ]) assert runtime.read_entry(col2({"a": 10, "b": 12})).value == 220 with pytest.raises(Exception, match=".* collision.*"): runtime.upgrade_builder(col2, upgrade_confict) assert runtime.read_entry(col2({"a": 10, "b": 12})).value == 220 runtime.upgrade_builder(col2, upgrade) assert runtime.try_read_entry(col2({"a": 10, "b": 12})) is None assert runtime.read_entry(col2({"a": 10, "b": 12, "c": 22})).value == 220 assert runtime.try_read_entry(col2({"a": 14, "b": 11})) is None assert runtime.read_entry(col2({"a": 14, "b": 11, "c": 25})).value == 250
def test_builder_upgrade(env): runtime = env.test_runtime(n_processes=1) def creator(a): return a * 10 def adder2(a, b, **kwargs): ae = col1(a) be = col1(b) yield return ae.value + be.value def upgrade(config): config["c"] = config["a"] + config["b"] return config def upgrade_confict(config): del config["a"] return config col1 = runtime.register_builder(Builder(creator, "col1")) col2 = runtime.register_builder(Builder(adder2, "col2")) runtime.compute(col1(123)) runtime.compute_many([ col2(**c) for c in [{ "a": 10, "b": 12 }, { "a": 14, "b": 11 }, { "a": 17, "b": 12 }] ]) assert runtime.read(col2(a=10, b=12)).value == 220 with pytest.raises(Exception, match=".* collision.*"): runtime.upgrade_builder(col2, upgrade_confict) assert runtime.read(col2(a=10, b=12)).value == 220 runtime.upgrade_builder(col2, upgrade) assert runtime.try_read(col2(a=10, b=12)) is None assert runtime.read(col2(a=10, b=12, c=22)).value == 220 assert runtime.try_read(col2(a=14, b=11)) is None assert runtime.read(col2(a=14, b=11, c=25)).value == 250
def test_builder_compute(env): runtime = env.test_runtime() runtime.configure_executor(n_processes=1) counter = env.file_storage("counter", 0) def adder(config): counter.write(counter.read() + 1) return config["a"] + config["b"] builder = runtime.register_builder(Builder(adder, "col1")) entry = runtime.compute(builder({"a": 10, "b": 30})) assert entry.config["a"] == 10 assert entry.config["b"] == 30 assert entry.value == 40 assert entry.comp_time >= 0 assert counter.read() == 1 result = runtime.compute_many([builder({"a": 10, "b": 30})]) assert len(result) == 1 entry = result[0] assert entry.config["a"] == 10 assert entry.config["b"] == 30 assert entry.value == 40 assert entry.comp_time >= 0 assert counter.read() == 1
def test_builder_computed(env): runtime = env.test_runtime() runtime.configure_executor(n_processes=1) def build_fn(x): return x * 10 builder = runtime.register_builder(Builder(build_fn, "col1")) tasks = [builder(b) for b in [2, 3, 4, 0, 5]] assert len(tasks) == 5 assert runtime.read_entries(tasks) == [None] * len(tasks) assert runtime.read_entries(tasks, drop_missing=True) == [] runtime.compute_many(tasks) assert [e.value for e in runtime.read_entries(tasks)] == [20, 30, 40, 0, 50] assert [ e.value if e else "missing" for e in runtime.read_entries(tasks + [builder(123)]) ] == [20, 30, 40, 0, 50, "missing"] assert [ e.value if e else "missing" for e in runtime.read_entries(tasks + [builder(123)], drop_missing=True) ] == [20, 30, 40, 0, 50]
def test_db_set_value(env): r = env.test_runtime() r.configure_executor(heartbeat_interval=1) e1 = r.start_executor() c = r.register_builder(Builder(None, "col1")) assert r.db.get_entry_state(c.name, make_key("cfg1")) is None r.db.announce_entries(e1.id, [c("cfg1")], []) assert r.db.get_entry_state(c.name, make_key("cfg1")) == "announced" assert r.try_read_entry(c("cfg1")) is None assert r.try_read_entry(c("cfg1"), include_announced=True) is not None e = make_raw_entry(r, c, "cfg1", "value1") r.db.set_entry_values(e1.id, [e]) assert r.db.get_entry_state(c.name, make_key("cfg1")) == "finished" assert r.try_read_entry(c("cfg1")) is not None assert r.try_read_entry(c("cfg1"), include_announced=True) is not None with pytest.raises(Exception): r.db.set_entry_values(e1.id, [e]) e2 = make_raw_entry(r, c, "cfg2", "value2") with pytest.raises(Exception): r.db.set_entry_values(e1.id, [e2]) r.db.announce_entries(e1.id, [c("cfg2")], []) r.db.set_entry_values(e1.id, [e2]) with pytest.raises(Exception): r.db.create_entries([e2]) e3 = make_raw_entry(r, c, "cfg3", "value3") r.db.create_entries([e3])
def test_executor_timeout(env): runtime = env.test_runtime() executor = env.executor(runtime, heartbeat_interval=1, n_processes=2) executor.start() def compute(c): time.sleep(c["time"]) return c["time"] def job_setup(c): return {"timeout": c.get("timeout")} col0 = runtime.register_builder( Builder(compute, "col0", job_setup=job_setup)) config0 = {"time": 1, "timeout": 0.2} with pytest.raises(JobFailedException, match=".*timeout.*"): assert runtime.compute(col0(config0)) reports = runtime.get_reports() assert len(reports) == 2 assert reports[0].report_type == "timeout" assert reports[0].builder_name == "col0" assert reports[0].config == config0 assert "timeout" in reports[0].message assert runtime.compute(col0({"time": 1})).value == 1 assert runtime.compute(col0({"time": 0.2, "timeout": 5})).value == 0.2
def test_frozen_builder(env): @builder(is_frozen=True) def b0(x): pass runtime = env.test_runtime() def b1(v): f = b0(x=v) yield return f.value * 10 col2 = runtime.register_builder(Builder(b1, "col2")) runtime.insert(b0(x="a"), 11) assert runtime.compute(col2("a")).value == 110 assert runtime.compute(b0(x="a")).value == 11 with pytest.raises(Exception, match=".*Frozen builder.*"): assert runtime.compute(col2("b")) with pytest.raises(Exception, match=".*Frozen builder.*"): assert runtime.compute(col2("b")) with pytest.raises(Exception, match=".*Frozen builder.*"): assert runtime.compute(b0("b"))
def test_builder_compute(env): runtime = env.test_runtime(n_processes=1) counter = env.file_storage("counter", 0) def adder2(a, b): counter.write(counter.read() + 1) return a + b bld = runtime.register_builder(Builder(adder2, "col1")) job = runtime.compute(bld(10, 30)) assert job.config["a"] == 10 assert job.config["b"] == 30 assert job.value == 40 assert job.metadata().computation_time >= 0 assert counter.read() == 1 result = runtime.compute_many([bld(b=30, a=10)]) assert len(result) == 1 job = result[0] assert job.config["a"] == 10 assert job.config["b"] == 30 assert job.value == 40 assert job.metadata().computation_time >= 0 assert counter.read() == 1
def test_builder_clear(env): runtime = env.test_runtime() col1 = runtime.register_builder(Builder(lambda c: c, "col1")) def b2(c): d = col1(c) yield return d.value col2 = runtime.register_builder(Builder(b2, "col2")) runtime.compute(col2(1)) runtime.clear(col1) assert runtime.get_entry_state(col1(1)) is None assert runtime.get_entry_state(col2(1)) is None assert runtime.get_entry_state(col2(2)) is None
def test_builder_to_pandas(env): runtime = env.test_runtime() col1 = runtime.register_builder(Builder(lambda c: c * 2, "col1")) runtime.compute_many([col1(x) for x in [1, 2, 3, 4]]) frame = export_builder(runtime, col1.name) assert len(frame) == 4 assert sorted(frame["arg.c"]) == [1, 2, 3, 4]
def test_rest_builders(env): rt = env.test_runtime() c = rt.register_builder(Builder(None, "hello", is_frozen=True)) rt.register_builder(Builder(None, "hello2", is_frozen=True)) with rt.serve(testing=True).test_client() as client: r = client.get("rest/builders").get_json() assert len(r) == 2 for v in r: assert v["name"] in ("hello", "hello2") assert v["n_finished"] == 0 assert v["n_failed"] == 0 assert v["n_in_progress"] == 0 rt.insert(c(x=1, y=[1, 2, 3]), "ABC") rt.insert(c(e="e2"), "A" * (1024 * 1024)) r = client.get("rest/builders").get_json() bmap = {v["name"]: v for v in r} assert "hello" in bmap and "hello2" in bmap rr = bmap["hello"] assert rr["n_finished"] == 2 assert (1024 * 1024) < rr["size"] < (1024 * 1024 + 2000) r = client.get("rest/jobs/hello") rr = r.get_json() rr.sort(key=lambda x: x["id"]) job_ids = [] for item in rr: assert item.get("key") del item["key"] assert item.get("size") del item["size"] job_ids.append(item.pop("id")) assert len(rr) == 2 assert rr[1]["config"] == {"e": "e2"} assert rr[0]["config"] == {"x": 1, "y": [1, 2, 3]} r = client.get("rest/blobs/" + str(job_ids[1])).get_json() assert len(r) == 1 v = r[0] assert 50 < len(v["repr"]) <= 85 assert v["size"] > 1000 assert v["mime"] == consts.MIME_PICKLE
def test_rest_reports(env): rt = env.test_runtime() col1 = rt.register_builder(Builder(lambda c: c * 10, "col1")) rt.compute_many([col1(20), col1(30)]) with rt.serve(testing=True).test_client() as client: r = client.get("rest/reports").get_json() assert len(r) == 1 assert r[0]["type"] == "info" assert r[0]["builder"] is None
def test_builder_double_yield_error(env): def builder_fn(c): yield yield return 123 runtime = env.test_runtime() builder = runtime.register_builder(Builder(builder_fn, "col1")) with pytest.raises(Exception, match="yielded"): runtime.compute(builder(1))
def test_reports(env): def adder(config): return config["a"] + config["b"] runtime = env.test_runtime() builder = runtime.register_builder(Builder(adder, "col1")) runtime.compute(builder({"a": 10, "b": 30})) reports = runtime.get_reports() assert len(reports) == 1
def test_builder_to_pandas(env): runtime = env.test_runtime() col1 = runtime.register_builder(Builder(lambda c: c * 2, "col1")) runtime.compute_many([col1(x) for x in [1, 2, 3, 4]]) frame = export_builder_to_pandas(runtime, col1.name) assert len(frame) == 4 assert sorted(frame["config"]) == [1, 2, 3, 4] assert sorted(frame["value"]) == [2, 4, 6, 8] assert frame[frame["config"] == 1]["value"].iloc[0] == 2
def test_pickle_builder(): def fn(x): return x + 1 f = Builder(fn) bf = f print(f) s = pickle.dumps(bf) f2 = pickle.loads(s) assert f2.run_with_config({"x": 42}) == 43 assert f2.run_with_args((44, ), {}) == 45
def test_builder_error_in_deps(env): def builder_fn(c): if c != 0: raise Exception("MyError") yield return 123 runtime = env.test_runtime() builder = runtime.register_builder(Builder(builder_fn, "col1")) with pytest.raises(Exception, match="MyError"): runtime.compute(builder(1))
def test_runner_selection(env): runtime = env.test_runtime() testing_runner = NaiveRunner() runtime.add_runner("tr", testing_runner) b1 = runtime.register_builder(Builder(lambda c: c, "col1", job_setup="tr")) def builder_fn(c): b1(c) yield return c b2 = runtime.register_builder(Builder(builder_fn, "col2")) r = runtime.compute(b2(10)) assert r.value == 10 assert r.job_setup == {} r = runtime.read_entry(b1(10)) assert r.job_setup == {"runner": "tr"} assert len(testing_runner.events) == 1
def test_fixed_builder(env): runtime = env.test_runtime() fix1 = runtime.register_builder(Builder(None, "fix1")) def b1(config): f = fix1(config) yield return f.value * 10 col2 = runtime.register_builder(Builder(b1, "col2")) runtime.insert(fix1("a"), 11) assert runtime.compute(col2("a")).value == 110 assert runtime.compute(fix1("a")).value == 11 with pytest.raises(Exception, match=".* fixed builder.*"): assert runtime.compute(col2("b")) with pytest.raises(Exception, match=".* fixed builder.*"): assert runtime.compute(col2("b"))