Ejemplo n.º 1
0
def test_agent_planner():
    rs_pomcp = POMCP(rs_type, horizon=5)
    agent.planner = rs_pomcp
    while agent.planning_horizon >= 0:
        obs, reward = agent.execute_action
        print(f'Action {agent.actions[len(agent.actions)-1].name}, observation: {obs.name} and reward {reward}')
    print(f'Total reward accumulated: {agent.agent_type.frame.oc.get_current_reward()}')
Ejemplo n.º 2
0
 def best_response(self):
     tom_zero_worker_pomcp = POMCP(
         self.worker_type,
         horizon=self.planning_horizon,
         exploration_bonus=self.worker_agent.exploration_bonus)
     self.worker_agent.planner = tom_zero_worker_pomcp
     action, q_value = self.worker_agent.compute_optimal_policy
     return action, q_value
Ejemplo n.º 3
0
def test_manager_planner():
    tom_zero_manager_pomcp = POMCP(manager_type, horizon=5)
    manager.planner = tom_zero_manager_pomcp
    while manager.planning_horizon >= 0:
        obs, reward = manager.execute_action
        print(
            f'Action {manager.actions[len(manager.actions)-1].name}, observation: {obs.name} and reward {reward}'
        )
        if obs.value:
            break
    print(
        f'Total reward accumulated: {manager.agent_type.frame.oc.get_current_reward()}'
    )
Ejemplo n.º 4
0
            belief_dict = {
                is_: belief_e
                for belief_e, is_ in zip(initial_belief, is_list_cur_level)
            }  # check might be source of bug when intial belief is <TL, TR>
            for frame_cur_level in frames_cur_level:
                #cur_model = Model.create_or_get_model(belief_dict, frame_cur_level)

                if cur_level == 0:
                    environment = Environment(
                        physical_states, frame_cur_level.observations,
                        frame_cur_level.actions,
                        frame_cur_level.observation_table,
                        frame_cur_level.reward_table,
                        frame_cur_level.transition_table)
                    solver = POMCP(environment)
                else:
                    environment = MultiAgentEnvironment(
                        physical_states, frame_cur_level.observations,
                        frame_cur_level.actions,
                        frame_cur_level.observation_table,
                        frame_cur_level.reward_table,
                        frame_cur_level.transition_table)
                    solver = IPOMCP(environment)

                cur_model = Model(belief_dict, "", frame_cur_level, solver)
                model_list_cur_level.append(cur_model)
        if cur_level == 0:
            model_list_0_level = model_list_cur_level
        model_list_prev_level = model_list_cur_level
Ejemplo n.º 5
0
observations = ['GL', 'GR']
sensor_accuracy = 0.85
reward_listen = -1
reward_gold = 10
reward_tiger = -100
noisy = True
beta = None

gamma = 0.9
epsilon = 0.01
horizon = 3

environment = TigerProblem(states, observations, actions, sensor_accuracy,
                           reward_listen, reward_gold, reward_tiger, noisy,
                           beta)
pomcp = POMCP(environment, gamma, epsilon, horizon)

history = ''
initial_belief = {0: 0.5, 1: 0.5}

#testing first best action
#root_node, best_action=pomcp.search(history, initial_belief, i = 10000)
#print("Root Node: "+ str(root_node))
#print("Best Action: "+ str(best_action))
#sys.exit()

#finite horizon, averaged across multiple episodes
'''
num_episodes = 100
rewards = []
#run for all time-steps
Ejemplo n.º 6
0
def test_pomcp():
    root_node = ObservationNode(None, '', '')
    tiger_pomcp = POMCP(tiger_type, horizon=3)
    br_node, br_value = tiger_pomcp.search(root_node)
    obs, reward = tiger_problem.pomdp_step(br_node)