# states = range(gwg.nstates) alphabet = [0,1,2,3] # North, south, west, east transitions = [] for s in states: for a in alphabet: for t in np.nonzero(gwg.prob[gwg.actlist[a]][s])[0]: p = gwg.prob[gwg.actlist[a]][s][t] transitions.append((s, alphabet.index(a), t, p)) mdp = MDP(states, set(alphabet),transitions) # V, goodpolicy = mdp.max_reach_prob(goodtargets, epsilon=0.0001) # V, badpolicy = mdp.max_reach_prob(badtargets, epsilon=0.0001) randomness = 0 R = dict([(s,a,next_s),0.0] for s in mdp.states for a in mdp.available(s) for next_s in mdp.post(s,a) ) R.update([(s,a,next_s),1.0] for s in mdp.states for a in mdp.available(s) for next_s in mdp.post(s,a) if next_s in goodtargets and s in goodtargets) V,goodpolicy = mdp.T_step_value_iteration(R,10) R = dict([(s,a,next_s),0.0] for s in mdp.states for a in mdp.available(s) for next_s in mdp.post(s,a) ) R.update([(s,a,next_s),1.0] for s in mdp.states for a in mdp.available(s) for next_s in mdp.post(s,a) if next_s in badtargets and s in badtargets) V,badpolicy = mdp.T_step_value_iteration(R,10) good_MC = mdp.construct_MC(goodpolicy,'Examples/7x5_good.txt') bad_MC = mdp.construct_MC(badpolicy,'Examples/7x5_bad.txt') # Construct product mdp states = [(s1,s2) for s1 in gwg.states for s2 in gwg.states] product_trans = [] for s1 in states: for s2 in states: for a in alphabet:
for t in states) for a in range(gwg.nactions): if a in ab[s]: w = 1.0 / len(ab[s]) - randomness / (gwg.nstates - len(ab[s])) else: w = (1.0 / (gwg.nstates - len(ab[s]))) * randomness # tempdict = dict([(s, a, t),0.0] for t in states) for t in np.nonzero(gwg.prob[gwg.actlist[a]][s])[0]: p = gwg.prob[gwg.actlist[a]][s][t] transdict[(s, agentbehaviours.index(ab), t)] += p * w for t in states: transitions.append((s, agentbehaviours.index(ab), t, transdict[(s, agentbehaviours.index(ab), t)])) mdp1 = MDP(states, alphabet=range(2), transitions=transitions) R = dict([(s, a, next_s), 0.0] for s in mdp.states for a in mdp.available(s) for next_s in mdp.post(s, a)) R.update([(s, a, next_s), 1.0] for s in mdp.states for a in mdp.available(s) for next_s in mdp.post(s, a) if next_s in targets[0] and s not in targets[0]) V, policyT = mdp.T_step_value_iteration(R, T=20) policyT1 = dict([s, set(range(gwg.nactions))] for s in mdp.states for a in mdp.available(s)) agentbehaviours = [policyT1, policyT] transitions = [] for ab in agentbehaviours: for s in states: transdict = dict([(s, agentbehaviours.index(ab), t), 0.0] for t in states) for a in ab[s]: # tempdict = dict([(s, a, t),0.0] for t in states)