def test_sample_smoke(): spec, sys = scenario_reactive() cspec = concretize(spec, sys, 3) ctrl = policy(cspec, 3) actions = ctrl.simulate() assert len(actions) == 3 assert isinstance(cspec.accepts(actions), bool)
def test_prefix_tree(): spec, sys = scenario_reactive() encoded = [ [act(True, True), act(True, True), act(True, True)], [act(True, True), act(False, True), act(False, False)], ] def to_demo(encoded_trc): io_seq = zip(encoded_trc, sys.aigbv.simulate(encoded_trc)) for inputs, (_, state) in io_seq: inputs = fn.project(inputs, sys.inputs) yield inputs, state # Technical debt where sys_actions and env_actions # are two different lists. demos = [list(zip(*to_demo(etrc))) for etrc in encoded] tree = prefix_tree(sys, demos) tree.write_dot('foo.dot') cspec = concretize(spec, sys, 3) ctrl = fit(cspec, 0.7, bv=True) lprob = tree.log_likelihood(ctrl, actions_only=True) assert lprob < 0 assert tree.psat(cspec) == 1 / 2 lprob2 = tree.log_likelihood(ctrl, actions_only=False) assert lprob2 < lprob
def test_long_horizon(): # TODO: test that more scenarios work with long horizons. for scenario in [scenario1, scenario_reactive]: spec, mdp = scenario() cspec = concretize(spec, mdp, 20) ctrl = fit(cspec, 0.96) assert ctrl.psat == pytest.approx(0.96)
def test_flatten(): spec, sys = scenario_reactive() cspec = concretize(spec, sys, 3) actions = [act(True, True), act(True, False), act(True, True)] bits = cspec.flatten(actions) assert bits == [True, True, True, False, True, True] assert cspec.unflatten(bits) == actions
def test_psat_monotonicity(scenario): spec, mdp = scenario() cspec = concretize(spec, mdp, 3) prob = 0 for i in range(10): ctrl = policy(cspec, i) prev, prob = prob, ctrl.psat assert prev <= prob
def test_bv_policy(): spec, mdp = scenario_reactive() cspec = concretize(spec, mdp, 3) ctrl = fit(cspec, 0.96) qdd = cspec._as_dfa(qdd=True) ctrl_bv = BVPolicy(ctrl) ctrl_bv.prob(qdd.start, {'a': (False, False)})
def test_nx2qdd(): spec, sys = scenario_reactive() cspec = concretize(spec, sys, 3) graph, root, _ = spec2graph(cspec, qdd=True) assert nx.is_directed_acyclic_graph(graph) assert len(graph.nodes) == 12 + 4 assert len(graph.edges) == 22 for node in graph.nodes: assert graph.out_degree[node] <= 2
def test_policy_markov_chain(): spec, sys = scenario_reactive() cspec = concretize(spec, sys, 3) ctrl = policy(cspec, 3) adj, _ = ctrl.stochastic_matrix() assert adj[0, 0] == 1 assert adj[1, 1] == 1 row_sums = adj.sum(axis=1) assert np.allclose(row_sums, np.ones_like(row_sums))
def test_spec2graph(): spec, sys = scenario_reactive() cspec = concretize(spec, sys, 3) graph, root, _ = spec2graph(cspec) assert nx.is_directed_acyclic_graph(graph) # BDD size assert len(graph.nodes) == 10 assert len(graph.edges) == 16 for node in graph.nodes: assert graph.out_degree[node] <= 2
def test_abstract_trace(): spec, sys = scenario_reactive() cspec = concretize(spec, sys, 3) actions = [act(True, True), act(True, False), act(True, True)] trc = list(cspec.abstract_trace(actions)) for prev, curr in fn.rest(fn.with_prev(trc)): if prev == curr: assert prev.node.level == 6 assert prev.node == cspec.manager.false else: clvl, cdebt = curr.node.level, curr.debt plvl, pdebt = prev.node.level, prev.debt assert (clvl, -cdebt) < (plvl, -pdebt)
def test_concretize(): spec, sys = scenario_reactive() cspec = concretize(spec, sys, 3) actions = [act(True, True), act(True, False), act(True, True)] assert not cspec.accepts(actions) cspec2 = cspec.toggle(actions) assert cspec2.accepts(actions) assert cspec2.imap == cspec.imap assert set(cspec.imap.keys()) == {'a'} assert set(cspec.emap.keys()) == {'c'}
def test_reweighted(): spec, sys = scenario_reactive() # Hack too re-weight coinl sys2 = C.coin((1, 4), 'c') >> C.MDP(sys.aigbv >> BV.sink(1, ['##valid'])) cspec2 = concretize(spec, sys2, 3) graph, root, _ = spec2graph(cspec2) assert nx.is_directed_acyclic_graph(graph) assert len(graph.nodes) == 12 assert len(graph.edges) == 20 for node in graph.nodes: assert graph.out_degree[node] <= 2
def test_policy(): spec, sys = scenario_reactive() cspec = concretize(spec, sys, 3) ctrls = [policy(cspec)(3), policy(cspec, 3)] for i, ctrl in enumerate(ctrls): assert 0 <= ctrl.psat <= 1 assert len(ctrl.ref2action_dist) == 5 assert all(len(v) == 2 for v in ctrl.ref2action_dist.values()) assert all( sum(v.values()) == pytest.approx(1) for v in ctrl.ref2action_dist.values()) pctrl = policy(cspec) # Agent gets monotonically more optimal psats = [pctrl(x).psat for x in range(10)] assert all(x >= y for x, y in fn.with_prev(psats, 0))
def test_policy_markov_chain_psat(): spec, sys = scenario_reactive() cspec = concretize(spec, sys, 3) adj, _ = fit(cspec, 0.7).stochastic_matrix() root_vec = sp.sparse.csr_matrix((adj.shape[0], 1)) root_vec[2] = 1 true_vec = sp.sparse.csr_matrix((adj.shape[0], 1)) true_vec[1] = 1 vec = root_vec.T for _ in range(cspec.order.horizon * cspec.order.total_bits): vec = vec @ adj assert (vec @ true_vec).todense() == pytest.approx(0.7) vec = true_vec for _ in range(cspec.order.horizon * cspec.order.total_bits): vec = adj @ vec assert (root_vec.T @ vec).todense() == pytest.approx(0.7)
def score(spec): start_time = time.time() times = {} print("concretizing spec") cspec = concretize(spec, mdp, horizon) print("done spec") times["build spec"] = time.time() - start_time if psat is None: sat_prob = tree.psat(cspec) else: sat_prob = psat start_time = time.time() print("fitting policy") ctrl = fit(cspec, sat_prob, bv=True) print("done fitting") times["fit"] = time.time() - start_time start_time = time.time() print("compute log likelihood of demos") lprob = tree.log_likelihood(ctrl, actions_only=True) times["surprise"] = time.time() - start_time print("\n----------------------------\n") print(f"BDD size: {cspec.bexpr.dag_size}") print(f"Controller Size: {ctrl.size}") print(f"log_prob: {lprob}") print("\n".join(f"{key}: {val:.2}s" for key, val in times.items())) print("\n----------------------------\n") print(times) return lprob
def test_fit(): spec, sys = scenario_reactive() cspec = concretize(spec, sys, 3) assert fit(cspec, 0.7).psat == pytest.approx(0.7)