Example #1
0
 def __init__(self, states, actions, observations, observation_function,
              transition_function, reward_function, belief_points, gamma):
     POMDP.__init__(self, states, actions, observations,
                    observation_function, transition_function,
                    reward_function, gamma)
     self.belief_points = belief_points
     self.t = 0
     self.compute_gamma_reward()
Example #2
0
    def __init__(self, states, actions, observations, observation_function,
                 transition_function, reward_function, gamma, pruning_beliefs):

        POMDP.__init__(self, states, actions, observations,
                       observation_function, transition_function,
                       reward_function, gamma)
        self.compute_gamma_reward()
        self.t = 0
        self.pruning_beliefs = pruning_beliefs
Example #3
0
    def __init__(self,
                 states,
                 actions,
                 observations,
                 observation_function,
                 transition_function,
                 reward_function,
                 gamma,
                 initial_belief,
                 c=0.5):

        POMDP.__init__(self, states, actions, observations,
                       observation_function, transition_function,
                       reward_function, gamma)

        self.tree = Tree()
        self.initial_belief = initial_belief
        self.c = c
        self.a_selected = None
Example #4
0
 def __init__(self,
              num_players,
              values,
              T_fail=0,
              decay_rate=1,
              discount_factor=.5,
              num_round=25,
              search_max=120,
              depth=2,
              win_value=0):
     self.decay = decay_rate
     self.discount = discount_factor
     self.N = num_players
     self.values = values
     self.depth = depth
     self.win_value = win_value
     alpha_max = search_max + num_round * (self.N + 1)
     beta_max = search_max + num_round * (self.N + 1)
     self.T = np.zeros([num_round - 1, alpha_max, beta_max, 2])
     self.V = np.zeros([alpha_max, beta_max, num_round
                        ])  ## Value function (maximum expected reward)
     self.Q = np.zeros([alpha_max, beta_max, num_round,
                        2])  ## maximum Expected reward of each action
     self.P = np.zeros([alpha_max, beta_max,
                        num_round])  ## Policy function (best aciton)
     if depth == 2:
         self.preQ1 = POMDP.loadPomdpQVals(num_players, [1.5, 2.5], T_fail,
                                           decay_rate, discount_factor,
                                           num_round, search_max, win_value)
         self.preQ0 = POMDP.loadPomdpQVals(num_players, [2.5, 1.5], T_fail,
                                           decay_rate, discount_factor,
                                           num_round, search_max, win_value)
     else:
         self.preQ1 = IPOMDP.loadPomdpQVals(num_players, [1.5, 2.5], T_fail,
                                            decay_rate, discount_factor,
                                            num_round, search_max,
                                            depth - 1, win_value)
         self.preQ0 = IPOMDP.loadPomdpQVals(num_players, [2.5, 1.5], T_fail,
                                            decay_rate, discount_factor,
                                            num_round, search_max,
                                            depth - 1, win_value)
     self.findAllValues()
Example #5
0
 def MinMDPTest():
     score = 0
     f = open('tests/MinMDP', 'r')
     contents = [x.strip() for x in f.readlines() if (not (x.isspace()))]
     i = 0
     while i < len(contents):
         line = contents[i]
         if line.startswith('#'):
             print (line)
         elif line.startswith('Environment'):
             model_name = line.split()[1]
             model_file = 'examples/env/' + model_name + '.pomdp'
             print ('Environment:', model_name)
             pomdp  = POMDP(model_file)
             min_mdp = MinMDP(pomdp, .01)
         elif line.startswith('Belief'):
             pieces = [x for x in line.split() if (x.find(':') == -1)]
             belief = np.array([float(x) for x in pieces])
             print ('Belief =', belief)
         elif line.startswith('Value'):
             value = float(line.split()[1])
             ans_value = min_mdp.getValue(belief)
             print ("Value by MinMDP:" , value , "Your answer:", ans_value)
         elif line.startswith('Action'):
             action = int(line.split()[1])
             ans_action = min_mdp.chooseAction(belief)
             print ("Action by MinMDP:" , action , "Your answer:", ans_action)
             if abs(ans_value - value) < .01 and action == ans_action:
                 score += 1
                 print ("PASS")
             else:
                 print ("FAIL")
         elif line.startswith('Runs'):
             num_runs = int(line.split()[1])
             ans_total_reward = min_mdp.evaluate(num_runs)
         elif line.startswith('Reward'):
             total_reward = float(line.split()[1])
             print ("Reward by MinMDP:" , total_reward , "Your answer:", ans_total_reward)
         elif line.startswith('Error'):
             error = float(line.split()[1])
             if abs(total_reward - ans_total_reward) < error:
                 score +=1
                 print ("PASS")
             else:
                 print ("FAIL")
         else:
             raise Exception("Unrecognized line: " + line)
     
         i +=1
     print ("Total score out of 3:", score)
     return score
Example #6
0
 def AEMS2Test():
     score = 0
     f = open('tests/AEMS2', 'r')
     contents = [x.strip() for x in f.readlines() if (not (x.isspace()))]
     i = 0
     maxStart = time.time()
     print("start time", maxStart)
     while i < len(contents):
         line = contents[i]
         if line.startswith('#'):
             print (line)
         elif line.startswith('Environment'):
             model_name = line.split()[1]
             print ('Environment:', model_name)
             model_file = 'examples/env/' + model_name + '.pomdp'
             pomdp  = POMDP(model_file)
             qmdp = QMDP(pomdp, .01)
             m_mdp = MinMDP(pomdp, .01)
         elif line.startswith('Time'):
             time_limit = float(line.split()[1])
             print ("Time limit (sec):" , time_limit)
         elif line.startswith('Runs'):
             start = time.time()
             print("start time ", i, "=", start)
             num_runs = int(line.split()[1])
             sum_reward = 0
             for run in range(num_runs):
                 solver = AEMS2(pomdp, m_mdp, qmdp, .01, time_limit)
                 sum_reward += OnlineSolver.solve(solver)
             ans_total_reward = sum_reward / num_runs
             print (ans_total_reward)
         elif line.startswith('Reward'):
             total_reward = float(line.split()[1])
             print ("Minimum requried reward:" , total_reward , "Your answer:", ans_total_reward)
             if total_reward <= ans_total_reward:
                 score +=4
                 print ("PASS")
             else:
                 print ("FAIL")
             print("time: ", i, "=", time.time() - start)
         else:
             raise Exception("Unrecognized line: " + line)
         i +=1
     print("time:", time.time() - maxStart)
     print ("Total score out of 8:", score)
     return score
good_MC = mdp.construct_MC(goodpolicy,'Examples/7x5_good.txt')
bad_MC = mdp.construct_MC(badpolicy,'Examples/7x5_bad.txt')

# Construct product mdp
states = [(s1,s2) for s1 in gwg.states for s2 in gwg.states]
product_trans = []
for s1 in states:
    for s2 in states:
        for a in alphabet:
            p1 = gwg.prob[gwg.actlist[a]][s1[0]][s2[0]]
            p2 = bad_MC[(s1[1],s2[1])]
            if p1*p2>0:
                product_trans.append((s1,a,s2,p1*p2))

product_mdp = MDP(states, set(alphabet),product_trans)
product_pomdp = POMDP(product_mdp,gwg)
product_mdp.write_to_file('Examples/7x5_productmdp_bad',(30,4))
product_pomdp.write_to_file('Examples/7x5_productpomdp_bad',(30,4))

# Construct product mdp
states = [(s1,s2) for s1 in gwg.states for s2 in gwg.states]
product_trans2 = []
for s1 in states:
    for s2 in states:
        for a in alphabet:
            p1 = gwg.prob[gwg.actlist[a]][s1[0]][s2[0]]
            p2 = good_MC[(s1[1],s2[1])]
            if p1*p2>0:
                product_trans2.append((s1,a,s2,p1*p2))

product_mdp2 = MDP(states, set(alphabet),product_trans2)
Example #8
0
time_horizon = 20

pz = 0.7
pu3 = 0.8

lava_cost_f = -100
lava_cost_b = -50
door_cost = 100
u3_cost = -1

samples = 1000

planner = POMDP(pz,
                pu3,
                lava_cost_f,
                lava_cost_b,
                door_cost,
                u3_cost,
                samples,
                prune=True)

for T in range(time_horizon):
    obs = planner.observe()
    pred = planner.predict()

#print(pred)

book_lines = np.array([[door_cost, lava_cost_f], [lava_cost_b, door_cost]])
plt.figure()
plt.plot([0, 1], book_lines.T, 'r--')
plt.plot([planner.prob_turn_start, planner.prob_turn_start], [-500, 500], 'g:')
plt.plot([planner.prob_turn_end, planner.prob_turn_end], [-500, 500], 'g:')
Example #9
0
from pomdp import POMDP
from environment import Environment
from onlineSolver import OnlineSolver
from offlineSolver import OfflineSolver
from policyReader import PolicyReader
from aems import AEMS2
from mdpSolver import QMDP, MinMDP

import sys

if len(sys.argv) == 5:
    #offline solver
    model_name = sys.argv[2]
    model_file = 'examples/env/' + model_name + '.pomdp'
    pomdp = POMDP(model_file)
    num_runs = int(sys.argv[3])
    precision = float(sys.argv[4])
    if sys.argv[1] == "QMDP":
        solver = QMDP(pomdp, precision)
    elif sys.argv[1] == "MinMDP":
        solver = MinMDP(pomdp, precision)
    else:
        raise Exception("Invalid offline solver: ", sys.argv[1])
    print("Average reward: ", solver.evaluate(num_runs))

elif len(sys.argv) == 8:
    #online solver
    if sys.argv[1] != "AEMS2":
        raise Exception("Invalid online solver: ", sys.argv[1])
Example #10
0
#!/usr/bin/env python3
from pomdp import POMDP
import matplotlib.pyplot as plt

# ------------------------------------------------------------------
# Summary:
# Example of implementation of the partially observable markov decision process class for a simplified two-state robot model

if __name__ == "__main__":
    pomdp = POMDP()  # MDP algorithm object
    pomdp.CreateValueMap()
    pomdp.Play()
    plt.show()