def test_template_sample(): data = make_template(dict()) raises(ValueError, lambda: list(data.sample(0, 0))) raises(ValueError, lambda: list(data.sample(-1, 0))) assert [dict()] == list(data.sample(100, 0)) data = make_template(dict(a=1, b=2)) assert [dict(a=1, b=2)] == list(data.sample(100, 0)) data = make_template(dict(a=1, b=Rand(0, 1))) assert list(data.sample(10, 0)) == list(data.sample(10, 0)) assert list(data.sample(10, 0)) != list(data.sample(10, 1)) a = list(data.sample(10, 0)) assert 10 == len(a) assert all(x.template["b"] >= 0 and x.template["b"] <= 1 for x in a) assert all(x.empty for x in a) assert all(not x.has_grid for x in a) assert all(not x.has_stochastic for x in a) u = Rand(0, 1) data = make_template(dict(a=1, b=u, c=Grid(0, 1), d=[u])) a = list(data.sample(10, 0)) assert 10 == len(a) assert all(x.template["b"] >= 0 and x.template["b"] <= 1 for x in a) assert all(x.template["d"][0] == x.template["b"] for x in a) assert all(not x.empty for x in a) assert all(x.has_grid for x in a) assert all(not x.has_stochastic for x in a)
def test_normal_rand(): with raises(ValueError): NormalRand(1.0, 0.0) with raises(ValueError): NormalRand(1.0, -1.0) v = NormalRand(0.05, 0.2) assert v.generate(0) == v.generate(0) assert v.generate(0) != v.generate(1) res = v.generate_many(100000, 0) t = stats.kstest(res, "norm", args=(0.05, 0.2)) assert t.pvalue > 0.4 v = NormalRand(0.05, 0.2, q=0.1) assert v.generate(0) == v.generate(0) assert v.generate(0) != v.generate(1) actual = [ x for x in v.generate_many(1000, 0) if x >= -0.155 and x <= 0.255 ] assert_close([-0.15, -0.05, 0.05, 0.15, 0.25], actual) v2 = NormalRand(0.05, 0.2, q=0.1) v3 = Rand(0.05, 0.2, q=0.1) assert to_uuid(v) == to_uuid(v2) assert to_uuid(v) != to_uuid(v3)
def test_randint(): with raises(ValueError): RandInt(0, 10, log=True) # for log, low>=1.0 v = RandInt(10, 20, log=False) assert v.generate(0) == v.generate(0) assert v.generate(0) != v.generate(1) actual = set(v.generate_many(100, 0)) assert set(range(10, 21)) == actual v = RandInt(10, 20, include_high=False) actual = set(v.generate_many(100, 0)) assert set(range(10, 20)) == actual v = RandInt(10, 20, q=5, include_high=False) actual = set(v.generate_many(100, 0)) assert set([10, 15]) == actual v = RandInt(10, 20, q=5, include_high=True) actual = set(v.generate_many(100, 0)) assert set([10, 15, 20]) == actual v = RandInt(3, 20, log=True) assert v.generate(0) == v.generate(0) assert v.generate(0) != v.generate(1) actual = set(v.generate_many(1000, 0)) assert set(range(3, 21)) == actual v1 = RandInt(1, 20, q=2) v2 = RandInt(1, 20, q=2) v3 = Rand(1, 20, q=2) assert to_uuid(v1) == to_uuid(v2) assert to_uuid(v1) != to_uuid(v3)
def test_trial_serialization(): p = {"a": 1, "b": Rand(1, 2)} trial = Trial("abc", p, {}, keys=["x", "y"], dfs={"v": ""}) t = cloudpickle.loads(cloudpickle.dumps(trial)) assert isinstance(t.params, TuningParametersTemplate) assert ["x", "y"] == t.keys assert t.trial_id == trial.trial_id assert {"v": ""} == t.dfs
def test_template_product(): data = make_template(dict()) assert [dict()] == list(data.product_grid()) data = make_template(dict(a=1, b=2)) assert [dict(a=1, b=2)] == list(data.product_grid()) data = make_template(dict(a=1, b=Grid(0, 1))) assert [dict(a=1, b=0), dict(a=1, b=1)] == list(data.product_grid()) u = Grid(0, 1) data = make_template(dict(a=u, b=1, c=[u], d=Grid(0, 1))) assert [ dict(a=0, b=1, c=[0], d=0), dict(a=0, b=1, c=[0], d=1), dict(a=1, b=1, c=[1], d=0), dict(a=1, b=1, c=[1], d=1), ] == list(data.product_grid()) data = make_template(dict(a=1, b=Grid(0, 1), c=Rand(0, 1))) assert [dict(a=1, b=0, c=Rand(0, 1)), dict(a=1, b=1, c=Rand(0, 1))] == list(data.product_grid())
def test_trial_decision(): trial = Trial("abc", {"a": 1}, {"b": Rand(0, 2)}) report = TrialReport(trial, metric=np.float(0.1), params={"c": Rand(0, 3)}, metadata={"d": 4}) decision = TrialDecision(report, budget=0.0, should_checkpoint=True, metadata={"x": 1}, reason="p") assert trial is decision.trial assert report is decision.report decision = cloudpickle.loads(cloudpickle.dumps(decision)) assert decision.should_stop assert decision.should_checkpoint assert {"x": 1} == decision.metadata assert "p" == decision.reason assert 0.0 == decision.budget assert copy.copy(decision) is decision assert copy.deepcopy(decision) is decision d2 = cloudpickle.loads(cloudpickle.dumps(decision)) assert d2.trial_id == trial.trial_id # assert Rand(0, 3) == d2.report.params["c"] assert decision.should_stop assert decision.should_checkpoint assert {"x": 1} == decision.metadata assert "p" == decision.reason decision = TrialDecision(report, budget=1.0, should_checkpoint=True, metadata={"x": 1}) assert 1.0 == decision.budget assert not decision.should_stop print(decision)
def test_repr(): assert "Grid('a', 'b')" == repr(Grid("a", "b")) assert "Choice('a', 'b')" == repr(Choice("a", "b")) assert "TransitionChoice('a', 'b')" == repr(TransitionChoice("a", "b")) assert "Rand(low=0.2, high=1, q=0.1, log=True, include_high=False)" == repr( Rand(0.2, 1, 0.1, True, False)) assert "RandInt(low=2, high=10, q=2, log=True, include_high=False)" == repr( RandInt(2, 10, 2, True, False)) assert "NormalRand(mu=0.1, sigma=0.2, q=0.3)" == repr( NormalRand(0.1, 0.2, 0.3)) assert "NormalRandInt(mu=2, sigma=0.2, q=3)" == repr( NormalRandInt(2, 0.2, 3)) assert "FuncParam(make_template, a=Grid('a', 'b'))" == repr( FuncParam(make_template, a=Grid("a", "b"))) assert "FuncParam(<lambda>, Grid('a', 'b'))" == repr( FuncParam(lambda x: x + 1, Grid("a", "b"))) assert "{'a': 1, 'b': Grid(1, 2)}" == repr( TuningParametersTemplate(dict(a=1, b=Grid(1, 2)))) assert "{'a': 1, 'b': Grid(1, 2)}" == str( TuningParametersTemplate(dict(a=1, b=Grid(1, 2))))
def test_builder(tmpdir): space = Space(a=1, b=2, c=Grid(2, 3)) builder = TuneDatasetBuilder(space, str(tmpdir)) def assert_count(df: DataFrame, n: int, schema=None) -> None: assert len(df.as_array()) == n if schema is not None: assert df.schema == schema # test to_space with FugueWorkflow() as dag: df = builder.build(dag).data df.show() df1 = ArrayDataFrame([[0, 1], [1, 1], [0, 2]], "a:int,b:int") # test single df with FugueWorkflow() as dag: builder.add_dfs(WorkflowDataFrames(x=dag.df(df1))) dataset = builder.build(dag) assert ["x"] == dataset.dfs assert [] == dataset.keys df = dataset.data df.show() df.output( assert_count, params=dict(n=2, schema=f"__tune_df__x:str,{TUNE_DATASET_TRIALS}:str"), ) space = Space(b=Rand(0, 1), a=1, c=Grid(2, 3), d=Grid("a", "b")) df2 = ArrayDataFrame([[0, 1], [1, 1], [3, 2]], "a:int,bb:int") df3 = ArrayDataFrame([[10, 1], [11, 1], [10, 2]], "a:int,c:int") builder = TuneDatasetBuilder(space) engine = NativeExecutionEngine(conf={TUNE_TEMP_PATH: str(tmpdir)}) # test multiple dfs, batch_size and config with FugueWorkflow(engine) as dag: dfs = WorkflowDataFrames(a=dag.df(df1).partition_by("a"), b=dag.df(df2).partition_by("a")) dataset = (builder.add_dfs(dfs, "inner").add_df("c", dag.df(df3), "cross").build(dag)) assert ["a"] == dataset.keys assert ["a", "b", "c"] == dataset.dfs df = dataset.data df.show() df.output( assert_count, params=dict( n=8, schema="a:int,__tune_df__a:str,__tune_df__b:str," f"__tune_df__c:str,{TUNE_DATASET_TRIALS}:str", ), ) df = builder.build(dag, batch_size=3).data df.show() df.output( assert_count, params=dict( n=4, schema="a:int,__tune_df__a:str,__tune_df__b:str," f"__tune_df__c:str,{TUNE_DATASET_TRIALS}:str", ), )
def test_trial_report(): trial = Trial("abc", {"a": Rand(3, 4)}, {"b": 2}) report = copy.copy( TrialReport( trial, metric=np.float(0.1), params={"c": Rand(1, 2)}, metadata={"d": 4}, cost=2.0, )) assert trial is report.trial report = cloudpickle.loads(cloudpickle.dumps(report)) assert 0.1 == report.metric assert type(report.metric) == float assert {"c": Rand(1, 2)} == report.params assert {"d": 4} == report.metadata assert 2.0 == report.cost assert 0 == report.rung assert 0.1 == report.sort_metric report = copy.deepcopy( TrialReport(trial, metric=np.float(0.111), cost=2.0, rung=4, sort_metric=1.23)) assert trial is report.trial report = cloudpickle.loads(cloudpickle.dumps(report)) assert 0.111 == report.metric assert type(report.metric) == float assert {"a": Rand(3, 4)} == report.params assert {} == report.metadata assert 2.0 == report.cost assert 4 == report.rung r1 = report.generate_sort_metric(True, 2) r2 = report.generate_sort_metric(False, 1) r3 = report.with_sort_metric(0.234) assert 1.23 == report.sort_metric assert 0.11 == r1.sort_metric assert -0.1 == r2.sort_metric assert 0.234 == r3.sort_metric ts = report.log_time sleep(0.1) report = cloudpickle.loads(cloudpickle.dumps(report)) nr = report.reset_log_time() assert nr.log_time > report.log_time assert report.log_time == ts assert trial.trial_id == report.trial_id assert 0.111 == report.metric assert type(report.metric) == float assert {"a": Rand(3, 4)} == report.params assert {} == report.metadata assert 2.0 == report.cost assert 3.0 == report.with_cost(3.0).cost assert 5 == report.with_rung(5).rung td = trial.with_dfs({"a": pd.DataFrame}) report = TrialReport(td, metric=np.float(0.1)) assert 0 == len(report.trial.dfs)
def test_rand(): with raises(ValueError): Rand(1.0, 0.9) with raises(ValueError): Rand(1.0, 10, q=-0.1) with raises(ValueError): Rand(1.0, 1.0, include_high=False) with raises(ValueError): Rand(0.0, 1.0, log=True) # for log, low>=1.0 v = Rand(0.1, 0.1, q=0.1, log=False) assert 0.1 == v.generate() assert 0.1 == v.generate(10) v = Rand(1.0, 1.0, q=0.1, log=True) assert 1.0 == v.generate() assert 1.0 == v.generate(10) v = Rand(1.0, 2.0, q=0.1, log=False) assert v.generate(0) == v.generate(0) assert v.generate(0) != v.generate(1) assert_close([x / 10 for x in range(10, 21)], v.generate_many(100, 0)) v = Rand(1.0, 2.09999, q=0.1, log=False) assert v.generate(0) == v.generate(0) assert v.generate(0) != v.generate(1) assert_close([x / 10 for x in range(10, 21)], v.generate_many(100, 0)) v = Rand(1.0, 2.0, q=0.1, log=False, include_high=False) assert v.generate(0) == v.generate(0) assert v.generate(0) != v.generate(1) assert_close([x / 10 for x in range(10, 20)], v.generate_many(100, 0)) v = Rand(1.0, 2.09999, q=0.1, log=False, include_high=False) assert v.generate(0) == v.generate(0) assert v.generate(0) != v.generate(1) assert_close([x / 10 for x in range(10, 21)], v.generate_many(100, 0)) v = Rand(0.1, 2.0, log=True, include_high=False) assert v.generate(0) == v.generate(0) assert v.generate(0) != v.generate(1) res = v.generate_many(10000, 0) for x in res: assert x >= 0.1 and x <= 2.0 t = stats.kstest(np.log(res), "uniform", args=(np.log(0.1), np.log(2) - np.log(0.1))) assert t.pvalue > 0.4 v1 = Rand(1.0, 2.0, q=0.1, log=False) v2 = Rand(1.0, 2.0, log=False, q=0.1) v3 = Rand(1.0, 2.0, log=False) assert to_uuid(v1) == to_uuid(v2) assert to_uuid(v1) != to_uuid(v3)
def test_tuning_parameters_template(): data = dict(a=1) e = make_template(data) assert e.empty assert not e.has_grid assert not e.has_stochastic data = dict(a=Rand(0, 1)) e = make_template(data) assert not e.empty assert not e.has_grid assert e.has_stochastic data = dict(a=Grid(0, 1)) e = make_template(data) assert not e.empty assert e.has_grid assert not e.has_stochastic data = dict( a=Rand(0, 1), b=Grid(2, 3), c=dict(a=Rand(10, 20), b=[dict(x=Rand(100, 200))], c=[1, Rand(1000, 2000)], d=None), d=None, ) e = make_template(data) assert not e.empty assert e.has_grid assert e.has_stochastic assert [ Rand(0, 1), Grid(2, 3), Rand(10, 20), Rand(100, 200), Rand(1000, 2000), ] == e.params res = e.fill([0.5, 2, 10.5, 100.5, 1000.5]) res2 = e.fill([0.55, 2, 10.55, 100.5, 1000.5]) assert (dict( a=0.5, b=2, c=dict(a=10.5, b=[dict(x=100.5)], c=[1, 1000.5], d=None), d=None, ) == res) assert res2 is not res assert (dict( a=0.55, b=2, c=dict(a=10.55, b=[dict(x=100.5)], c=[1, 1000.5], d=None), d=None, ) == res2) # extract and fill by dicts data = dict( a=Rand(0, 1), b=dict(x=[Grid(2, 3)]), ) e = make_template(data) assert dict(p0=Rand(0, 1), p1=Grid(2, 3)) == e.params_dict assert dict(a=0.5, b=dict(x=[2])) == e.fill_dict(dict(p1=2, p0=0.5)) # same express in template expr = Rand(0, 1) data = dict(a=expr, b=dict(x=expr), c=Rand(2, 4)) e = make_template(data) assert dict(p0=Rand(0, 1), p1=Rand(2, 4)) == e.params_dict assert dict(a=0.5, b=dict(x=0.5), c=2) == e.fill_dict(dict(p1=2, p0=0.5)) # special objects e = make_template(dict(a=Rand(0, 1), b=pd.DataFrame([[0]]))) # func def tf(*args, x): return sum(args) + x[0] u = Grid(0, 1) e = make_template(dict(a=1, b=[FuncParam(tf, Rand(0, 1), u, x=[u])])) assert e.has_grid assert e.has_stochastic assert dict(a=1, b=[2.5]) == e.fill([0.5, 1])