def test_tune_simple(): def t1(a: int, b: int) -> float: return a + b for distributable in [True, False, None]: with FugueWorkflow() as dag: df = space_to_df(dag, Space(a=Grid(0, 1), b=Grid(2, 3))) tune(df, t1, distributable=distributable).show() @tunable() def t2(e: ExecutionEngine, a: int, b: int) -> float: assert isinstance(e, ExecutionEngine) return a + b for distributable in [False, None]: with FugueWorkflow() as dag: df = space_to_df(dag, Space(a=Grid(0, 1), b=Grid(2, 3))) tune(df, t2, distributable=distributable).show() # equivalent syntax sugar with FugueWorkflow() as dag: t2.space(a=Grid(0, 1), b=Grid(2, 3)).tune(dag).show() with raises(FugueTuneCompileError): with FugueWorkflow() as dag: df = space_to_df(dag, Space(a=Grid(0, 1), b=Grid(2, 3))) tune(df, t2, distributable=True).show()
def test_space_to_df(): with FugueWorkflow() as dag: df = space_to_df(dag, Space(a=Grid(0, 1), b=Grid(2, 3))) df.assert_eq( dag.df( [ ['[{"a": 0, "b": 2}]'], ['[{"a": 0, "b": 3}]'], ['[{"a": 1, "b": 2}]'], ['[{"a": 1, "b": 3}]'], ], "__fmin_params__:str", )) with FugueWorkflow() as dag: df = space_to_df(dag, Space(a=Grid(0, 1), b=Grid(2, 3)), batch_size=3, shuffle=False) df.assert_eq( dag.df( [ ['[{"a": 0, "b": 2}, {"a": 0, "b": 3}, {"a": 1, "b": 2}]'], ['[{"a": 1, "b": 3}]'], ], "__fmin_params__:str", ))
def test_wf(): @tunable() def func(a: float, b: float, c: int, d: int) -> float: return a * a + b * b + c + d with FugueWorkflow() as dag: space = space_to_df( dag, Space(a=Grid(1, 2), b=Rand(-100, 100), c=Choice(1, -1), d=RandInt(0, 3)), ) tune(space, func, objective_runner=HyperoptRunner(100, seed=3)).show() with FugueWorkflow() as dag: space = space_to_df(dag, Space(a=Grid(1, 2), b=Grid(0, 1), c=1, d=2)) tune(space, func, objective_runner=HyperoptRunner(100, seed=3)).show()
def test_encode_decode(): s1 = Space( a=Grid(1, 2), b=Rand(0, 1.0, 0.2, log=True, normal=False), c=Choice(1, 2, 3), d=[Grid(1, 2), Rand(0, 2.0)], e={ "x": "xx", "y": Choice("a", "b") }, f=RandInt(0, 10, log=False, normal=True), ) actual = [decode(x) for x in s1.encode()] assert list(s1) == actual for x in s1.encode(): print(json.dumps(x, indent=2))
def test_visualize_top_n(tmpdir): def t1(a: int, b: int) -> float: return a + b with FugueWorkflow() as dag: df = space_to_df(dag, Space(a=Grid(0, 1), b=Grid(2, 3))) visualize_top_n(tune(df, t1, distributable=False), top=2) @tunable() def t2(df1: pd.DataFrame, df2: pd.DataFrame, a: int, b: int) -> Dict[str, Any]: return { "error": float(a + b + df1["y"].sum() + df2["y"].sum()), "metadata": { "a": a }, } e = NativeExecutionEngine(conf={FUGUE_TUNE_TEMP_PATH: str(tmpdir)}) with FugueWorkflow(e) as dag: df1 = dag.df([[0, 1], [1, 2], [0, 2]], "x:int,y:int").partition(by=["x"]) df2 = dag.df([[0, 10], [1, 20]], "x:int,y:int").partition(by=["x"]) res = t2.space(df1=df1, df2=df2, a=Grid(0, 1), b=Grid(2, 3)).tune() visualize_top_n(res, top=2)
def test_tunable_with_space(): t = _MockTunable() s1 = Space(a=Grid(0, 1)) s2 = Space(b=Grid(3, 4)) assert [ { "a": 0, "b": 3 }, { "a": 0, "b": 4 }, { "a": 1, "b": 3 }, { "a": 1, "b": 4 }, ] == list(t.space(s1, s2).space.encode()) assert [{ "a": 0, "b": 10 }, { "a": 1, "b": 10 }] == list(t.space(a=Grid(0, 1), b=10).space.encode()) assert [ { "a": 0, "b": 3 }, { "a": 0, "b": 4 }, { "a": 1, "b": 3 }, { "a": 1, "b": 4 }, ] == list(t.space(s1, b=Grid(3, 4)).space.encode())
def test_operators(): s1 = Space(a=1, b=Grid(2, 3)) s2 = Space(c=Grid("a", "b")) assert [ dict(a=1, b=2, c="a"), dict(a=1, b=2, c="b"), dict(a=1, b=3, c="a"), dict(a=1, b=3, c="b"), ] == list(s1 * s2) assert [ dict(a=1, b=2), dict(a=1, b=3), dict(c="a"), dict(c="b"), ] == list(s1 + s2) assert [ dict(a=1, b=2, c="a"), dict(a=1, b=3, c="a"), dict(a=1, b=2, c="b"), dict(a=1, b=3, c="b"), ] == list(s1 * [dict(c="a"), dict(c="b")]) assert [ dict(a=1, b=2), dict(a=1, b=3), dict(c="a"), dict(c="b"), ] == list(s1 + [dict(c="a"), dict(c="b")]) s1 = Space(a=1, b=Grid(2, 3)) s2 = Space(c=Grid("a", "b")) s3 = Space(d=5) assert ([ dict(a=1, b=2), dict(a=1, b=3), dict(c="a"), dict(c="b"), dict(d=5), ] == list(sum([s1, s2, s3])) == list(sum([s1, s2, s3], None)))
def test_single_space(): dicts = list(Space(a=1, b=Grid(2, 3, 4))) assert 3 == len(dicts) assert dict(a=1, b=2) == dicts[0] assert dict(a=1, b=3) == dicts[1] dicts = list(Space(a=Grid(None, "x"), b=Grid(2, 3))) assert 4 == len(dicts) dicts = list(Space(a=1, b=[Grid(2, 3), Grid(4, 5)])) assert 4 == len(dicts) assert dict(a=1, b=[2, 4]) == dicts[0] assert dict(a=1, b=[2, 5]) == dicts[1] assert dict(a=1, b=[3, 4]) == dicts[2] assert dict(a=1, b=[3, 5]) == dicts[3] dicts = list(Space(a=1, b=dict(x=Grid(2, 3), y=Grid(4, 5)))) assert 4 == len(dicts) assert dict(a=1, b=dict(x=2, y=4)) == dicts[0] assert dict(a=1, b=dict(x=2, y=5)) == dicts[1] assert dict(a=1, b=dict(x=3, y=4)) == dicts[2] assert dict(a=1, b=dict(x=3, y=5)) == dicts[3]
def test_tune_df(tmpdir): @tunable() def t1(a: int, df: pd.DataFrame, b: int) -> float: return float(a + b + df["y"].sum()) e = make_execution_engine(None, {FUGUE_TUNE_TEMP_PATH: str(tmpdir)}) for distributable in [True, False, None]: with FugueWorkflow(e) as dag: s = space_to_df(dag, Space(a=Grid(0, 1), b=Grid(2, 3)), batch_size=3) t = dag.df([[0, 1], [1, 2], [0, 2]], "x:int,y:int").partition(by=["x"]) df = serialize_df(t, "df", str(tmpdir)).cross_join(s.broadcast()) tune(df, t1, distributable=distributable).show() for distributable in [True, False, None]: with FugueWorkflow(e) as dag: df = dag.df([[0, 1], [1, 2], [0, 2]], "x:int,y:int") t1.space(a=Grid(0, 1), b=Grid(2, 3), df=df).tune().show() @tunable() def t2(df1: pd.DataFrame, df2: pd.DataFrame, a: int, b: int) -> Dict[str, Any]: return { "error": float(a + b + df1["y"].sum() + df2["y"].sum()), "metadata": { "a": a }, } with FugueWorkflow(e) as dag: df1 = dag.df([[0, 1], [1, 2], [0, 2]], "x:int,y:int").partition(by=["x"]) df2 = dag.df([[0, 10], [0, 20]], "x:int,y:int").partition(by=["x"]) t2.space(df1=df1, df2=df2, a=Grid(0, 1), b=Grid(2, 3)).tune().show()