def test_network_distribution(): T1 = np.array([[0, 0.5, 0.5], [0, 1, 0], [0, 0, 1]]) Z2 = np.array([[1, 0], [1, 0], [0, 1]]) pomdp1 = POMDP([T1], [Z2], input_names=['u1'], state_name='x1', output_name='z1') T21 = np.array([[0, 1, 0], [0, 1, 0], [0, 0, 1]]) T22 = np.array([[0, 0, 1], [0, 1, 0], [0, 0, 1]]) pomdp2 = POMDP([T21, T22], [np.eye(3)], input_names=['u2'], state_name='x2', output_name='z2') network = POMDPNetwork([pomdp1, pomdp2]) network.add_connection(['z1'], 'u2', lambda z1: {z1}) # distribution over u1 x1 x2 D_ux = sparse.COO([[0], [0], [0]], [1], shape=(1, 3, 3)) D_xz = propagate_network_distribution(network, D_ux) D_xz_r = sparse.COO([[1, 2], [1, 2], [0, 1], [1, 2]], [0.5, 0.5], shape=(3, 3, 2, 3)) np.testing.assert_equal(D_xz.todense(), D_xz_r.todense())
def environment_belief_model(p0, levels, name): # Create map belief MDP with prior p0 and qw quality of weak measurements if p0 == 0: # no dynamics return POMDP([np.array([1])], input_names=[name + '_u'], state_name=name + '_b', input_trans=lambda n: 0, output_trans=lambda s: 0) elif p0 == 1: return POMDP([np.array([1])], input_names=[name + '_u'], state_name=name + '_b', input_trans=lambda n: 0, output_trans=lambda s: 1) else: pm = levels[0] pp = levels[1] Tnone = np.eye(5) Tweak = np.array([[1, 0, 0, 0, 0], [0, 1, 0, 0, 0], [0, 1 - p0, 0, p0, 0], [0, 0, 0, 1, 0], [0, 0, 0, 0, 1]]) Tstrong = np.array([[1, 0, 0, 0, 0], [(1 - pm), 0, 0, 0, pm], [(1 - p0), 0, 0, 0, p0], [(1 - pp), 0, 0, 0, pp], [0, 0, 0, 0, 1]]) def output_fcn(s): return [0, pm, p0, pp, 1][s] return POMDP([Tnone, Tweak, Tstrong], input_names=[name + '_u'], state_name=name + '_b', output_trans=output_fcn)
def abstract(self, name_prefix): def move(s0, dim, direction): # which state is in direction along dim from s0? midx_s0 = np.unravel_index(s0, self.n_list) midx_s1 = list(midx_s0) midx_s1[dim] += direction midx_s1[dim] = max(0, midx_s1[dim]) midx_s1[dim] = min(self.n_list[dim]-1, midx_s1[dim]) return np.ravel_multi_index(midx_s1, self.n_list) T_list = [sp.eye(self.N)] for d in range(len(self.n_list)): vals = np.ones(self.N) n0 = np.arange(self.N) npl = [move(s0, d, 1) for s0 in np.arange(self.N) ] npm = [move(s0, d, -1) for s0 in np.arange(self.N) ] T_pm = sp.coo_matrix((vals, (n0, npm)), shape=(self.N, self.N)) T_list.append(T_pm) T_pl = sp.coo_matrix((vals, (n0, npl)), shape=(self.N, self.N)) T_list.append(T_pl) self.pomdp = POMDP(T_list, input_names=[name_prefix + '_u'], state_name=name_prefix + '_s', output_trans=self.s_to_x, output_name=name_prefix + '_x')
def test_Tuz(): T0 = np.array([[0, 0.5, 0.5], [0, 1, 0], [0.7, 0, 0.3]]) Z0 = np.array([[0.5, 0.5], [0, 1], [1, 0]]) pomdp = POMDP([T0], [Z0]) Tuz = pomdp.Tuz((0,), 0).todense() # probability of going to s and seeing z Tuz_r = np.array([[0, 0, 0.5], [0, 0, 0], [0.35, 0, 0.3]]) np.testing.assert_almost_equal(Tuz, Tuz_r) Tuz = pomdp.Tuz((0,), 1).todense() # probability of going to s and seeing z Tuz_r = np.array([[0, 0.5, 0], [0, 1, 0], [0.35, 0, 0]]) np.testing.assert_almost_equal(Tuz, Tuz_r)
def environment_belief_model2(p0, levels, name): pmm = levels[0] pm = levels[1] pp = levels[2] ppp = levels[3] if p0 == 0: # no dynamics return POMDP([np.array([1])], input_trans=lambda n: 0, output_trans=lambda s: 0) elif p0 == 1: # no dynamics return POMDP([np.array([1])], input_trans=lambda n: 0, output_trans=lambda s: 1) else: Tnone = np.eye(7) Tweak = np.array([[1, 0, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0], [0, 0, (1 - p0), 0, p0, 0, 0], [0, 0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 0, 1, 0], [0, 0, 0, 0, 0, 0, 1]]) Tstrong = np.array([[1, 0, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0, 0], [0, (1 - pm), 0, 0, 0, pm, 0], [0, (1 - p0), 0, 0, 0, p0, 0], [0, (1 - pp), 0, 0, 0, pp, 0], [0, 0, 0, 0, 0, 1, 0], [0, 0, 0, 0, 0, 0, 1]]) Texact = np.array([[1, 0, 0, 0, 0, 0, 0], [(1 - pmm), 0, 0, 0, 0, 0, pmm], [(1 - pm), 0, 0, 0, 0, 0, pm], [(1 - p0), 0, 0, 0, 0, 0, p0], [(1 - pp), 0, 0, 0, 0, 0, pp], [(1 - ppp), 0, 0, 0, 0, 0, ppp], [0, 0, 0, 0, 0, 0, 1]]) def output_fcn(s): return [0, pmm, pm, p0, pp, ppp, 1][s] return POMDP([Tnone, Tweak, Tstrong], input_names=[name + '_u'], state_name=name + '_b', output_trans=output_fcn)
def abstract(self, name_prefix=''): ''' represent graph as MDP by treating index of neigh as action number ''' T_list = [] n0_list = range(self.N) val_list = np.ones(self.N) for m in range(self.M): n1_list = [self.get_kth_neighbor(n0, m) for n0 in n0_list] T_list.append(sp.coo_matrix((val_list, (n0_list, n1_list)), shape=(self.N, self.N))) output_trans = lambda n: self.G.nodes[n]['xc'] self.mdp = POMDP(T_list, output_name=name_prefix + '_x', output_trans=output_trans)
def test_ssp_valiter2(): T0 = np.array([[0.1, 0.9, 0], [0, 1, 0], [0, 0, 1]]) network = POMDPNetwork([POMDP([T0])]) costs = np.ones([1, 3]) target = np.array([0, 1, 0]) val, pol = solve_ssp(network, costs, target, M=10) np.testing.assert_almost_equal(val, [1 / 0.9, 0, np.Inf], decimal=4)
def test_evaluate_Q(): T1 = np.array([[0, 0.5, 0.5], [0, 1, 0], [0, 0, 1]]) Z2 = np.array([[1, 0], [1, 0], [0, 1]]) pomdp1 = POMDP([T1], [Z2], input_names=['u1'], state_name='x1', output_name='z1') T21 = np.array([[0, 1, 0], [0, 1, 0], [0, 0, 1]]) T22 = np.array([[0, 0, 1], [0, 1, 0], [0, 0, 1]]) pomdp2 = POMDP([T21, T22], [np.eye(3)], input_names=['u2'], state_name='x2', output_name='z2') network = POMDPNetwork([pomdp1, pomdp2]) network.add_connection(['z1'], 'u2', lambda z1: {z1}) V = np.array([[0, 0, 0], [0, 0, 0], [0, 0, 1]]) np.testing.assert_almost_equal(evaluate_Q(network, (0, ), (0, 0), V), 0.5)
def test_evolve(): '''test non-deterministic connection''' T0 = np.array([[0, 1, 0], [0, 0, 1], [0, 0, 1]]) T1 = np.array([[1, 0, 0], [1, 0, 0], [0, 1, 0]]) mdp1 = POMDP([T0, T1], input_names=['u1'], state_name='x1') mdp2 = POMDP([T0, T1], input_names=['u2'], state_name='x2') network = POMDPNetwork() network.add_pomdp(mdp1) sp, _ = network.evolve([0], (0,)) np.testing.assert_equal(sp, [1]) network.add_pomdp(mdp2) sp, _ = network.evolve([1,1], (0,1)) np.testing.assert_equal(sp, [2, 0]) network.add_connection(['x1'], 'u2', lambda x1: set([0, 1])) n0 = 0 n2 = 0 for i in range(1000): sp, _ = network.evolve([1,1], (0,)) np.testing.assert_equal(sp[0], 2) if sp[1] == 0: n0 += 1 if sp[1] == 2: n2 += 1 np.testing.assert_equal(n0 + n2, 1000) np.testing.assert_array_less(abs(n0 -n2), 100)
def environment_belief_model(p0, name): # Create map belief MDP with prior p0 if p0 == 0: # no dynamics return POMDP([np.array([1])], input_names=[name + '_u'], state_name=name + '_b', input_trans=lambda n: 0, output_trans=lambda s: 0) if p0 == 1: return POMDP([np.array([1])], input_names=[name + '_u'], state_name=name + '_b', input_trans=lambda n: 0, output_trans=lambda s: 1) Tnone = np.eye(3) Tmeas = np.array([[1., 0, 0], [1 - p0, 0, p0], [0, 0, 1]]) return POMDP([Tnone, Tmeas], input_names=[name + '_u'], state_name=name, output_name=name + '_b', output_trans=lambda s: [0, p0, 1][s])
def formula_to_pomdp(formula): '''convert a co-safe LTL formula to a DFSA represented as a special case of a POMPD''' fsa = Fsa() fsa.from_formula(formula) fsa.add_trap_state() # mapping state -> state index N = len(fsa.g) dict_fromstate = dict([(sstate, s) for s, sstate in enumerate(sorted(fsa.g.nodes()))]) inputs = set.union( *[attr['input'] for _, _, attr in fsa.g.edges(data=True)]) M = len(inputs) assert (inputs == set(range(M))) T = dict( zip(product(*[range(2) for k in range(len(fsa.props))]), [np.zeros((N, N)) for k in range(M)])) input_names = sorted(fsa.props.keys(), key=lambda key: -fsa.props[key]) for (s1, s2, attr) in fsa.g.edges(data=True): for u in attr['input']: # get binary representation m_tuple = tuple( map(int, tuple(format(u, '0{}b'.format(len(fsa.props)))))) # check that input_names are in correct order test_props = set([ input_names[i] for i in range(len(input_names)) if m_tuple[i] ]) assert u == fsa.bitmap_of_props(test_props) T[m_tuple][dict_fromstate[s1], dict_fromstate[s2]] = 1 mdp = POMDP(T, input_names=input_names, state_name='mu') init_states = set( map(lambda state: dict_fromstate[state], [state for (state, key) in fsa.init.items() if key == 1])) final_states = set(map(lambda state: dict_fromstate[state], fsa.final)) return mdp, init_states, final_states
def test_propagate_distr(): T00 = np.array([[0, 0.5, 0.5], [0, 1, 0], [0.7, 0, 0.3]]) T01 = np.array([[0, 0, 1], [0, 0, 1], [0, 0, 1]]) T10 = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) T11 = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) Z0 = np.array([[0.5, 0.5], [0, 1], [1, 0]]) pomdp = POMDP({ (0, 0): T00, (0, 1): T01, (1, 0): T10, (1, 1): T11 }, [Z0], input_names=['u1', 'u2']) D1_ux = sparse.COO([[0, 0, 1], [0, 1, 1], [0, 0, 0]], [1, 0, 0], shape=(2, 2, 3)) D1_xz = propagate_distribution(pomdp, D1_ux) D1_xz_r = np.array([[0, 0], [0, 0.5], [0.5, 0]]) np.testing.assert_almost_equal(D1_xz.todense(), D1_xz_r) D2_ux = sparse.COO([[0, 0, 1], [0, 1, 1], [0, 0, 0]], [0, 1, 0], shape=(2, 2, 3)) D2_xz = propagate_distribution(pomdp, D2_ux) D2_xz_r = np.array([[0, 0], [0, 0], [1, 0]]) np.testing.assert_almost_equal(D2_xz.todense(), D2_xz_r) D3_ux = sparse.COO([[0, 0, 1], [0, 1, 1], [0, 0, 0]], [0, 0, 1], shape=(2, 2, 3)) D3_xz = propagate_distribution(pomdp, D3_ux) D3_xz_r = np.array([[0.5, 0.5], [0, 0], [0, 0]]) np.testing.assert_almost_equal(D3_xz.todense(), D3_xz_r) D4_ux = sparse.COO([[0, 0, 1], [0, 1, 1], [0, 0, 0]], [0.33, 0.33, 0.34], shape=(2, 2, 3)) D4_xz = propagate_distribution(pomdp, D4_ux) np.testing.assert_almost_equal( D4_xz.todense(), 0.33 * D1_xz_r + 0.33 * D2_xz_r + 0.34 * D3_xz_r)
def test_ssp_valiter1(): T1 = np.array([[0, 0, 0, 1, 0], [0, 1, 0, 0, 0], [0, 0, 1, 0, 0], [0, 0, 0, 1, 0], [0, 0, 0, 0, 1]]) T2 = np.array([[0, 1, 0, 0, 0], [0, 0, 0.5, 0, 0.5], [0, 0, 0, 0, 1], [0, 0, 0, 0, 1], [0, 0, 0, 0, 1]]) pomdp = POMDP([T1, T2]) network = POMDPNetwork([pomdp]) costs = np.ones([2, 5]) costs[1, 2] = 50 costs[1, 3] = 20 costs[:, 4] = 0 target = np.array([0, 0, 0, 0, 1]) val, pol = solve_ssp(network, costs, target, M=1000) np.testing.assert_almost_equal(val, [21, 26, 50, 20, 0])
def formula_to_logic(formula): '''convert propsitional logic formula to a logic gate represented as a special case of a POMDP''' fsa = Fsa() fsa.from_formula(formula) T = dict( zip(product(*[range(2) for k in range(len(fsa.props))]), [np.array([[1, 0], [1, 0]]) for k in range(2**len(fsa.props))])) init_state = next(s for (s, k) in fsa.init.items() if k == 1) final_state = next(s for s in fsa.final) input_names = sorted(fsa.props.keys(), key=lambda key: -fsa.props[key]) for u in fsa.g[init_state][final_state]['input']: m_tuple = tuple( map(int, tuple(format(u, '0{}b'.format(len(fsa.props)))))) T[m_tuple] = np.array([[0, 1], [0, 1]]) return POMDP(T, input_names=input_names, state_name='_'.join(input_names))
def __init__(self, lti_syst, eta, un=3, T2x=None, MKeps=None): '''Construct a grid abstraction of a LTI Gaussian system :param lti_syst: A LTI system (noise matrix must be diagonal) :param eta: abstraction grid size (one for each dimension) :param un: number of discrete inputs per dimension :param T2x=None: transformation matrix (use for rotated systems for easy access to original coordinates) :param MKeps=None: tuple (M, K, eps) defining a simulation relation. if None one will be computed ''' # check that W is diagonal if not np.all(lti_syst.W == np.diag(np.diagonal(lti_syst.W))): raise Exception('system noise must be diagonal') # store state transformation matrix if lti_syst.T2x is None: self.T2x = np.eye(lti_syst.dim) # identity else: self.T2x = lti_syst.T2x # compute/store simulation relation if MKeps is None: dist = pc.box2poly(np.diag(eta).dot(np.kron(np.ones((lti_syst.dim, 1)), np.array([[-1, 1]])))) self.M, self.K, self.eps = eps_err(lti_syst, dist) else: self.M = MKeps[0] self.K = MKeps[1] self.eps = MKeps[2] # state discretization information lx, ux = pc.bounding_box(lti_syst.X) lx = lx.flatten() ux = ux.flatten() remainx = eta - np.remainder(ux-lx, eta) # center slack lx -= remainx/2 ux += remainx/2 self.x_low = lx self.x_up = ux self.eta_list = eta.flatten() self.n_list = tuple(np.ceil((self.x_up - self.x_low)/self.eta_list).astype(int)) # save input discretization information: place inputs on boundary # NOTE: bounding box may give infeasible inputs.. lu, uu = pc.bounding_box(lti_syst.U) self.u_low = lu.flatten() self.m_list = tuple(un for i in range(lti_syst.m)) self.eta_u_list = (uu.flatten() - self.u_low)/(np.array(self.m_list)-1) transition_list = [np.zeros((self.N+1, self.N+1)) for m in range(prod(self.m_list))] # one dummy state # extract all transitions for ud in range(prod(self.m_list)): Pmat = np.zeros((self.N+1, self.N+1)) for s in range(self.N): s_diag = super(LTIGrid, self).s_to_x(s) mean = np.dot(lti_syst.a, s_diag) + np.dot(lti_syst.b, self.ud_to_u(ud)) # Ax P = np.ravel(grid_cdf_nd(mean, lti_syst.W, self.x_low, self.x_up, self.eta_list)) Pmat[s, 0:self.N] = P Pmat[s, self.N] = 1 - sum(P) Pmat[self.N, self.N] = 1 transition_list[ud] = Pmat self.mdp = POMDP(transition_list, input_names=['u_d'], state_name='s', output_trans=lambda s: (s, self.s_to_x(s)), output_name='(s,xc)')