class Navigation: def __init__(self, grid_size, discount, robot_state, beta, robot_goal, obs_sizes, true_env): self.human = Human(true_env, beta, discount, robot_goal, grid_size) robot_env = np.zeros((grid_size[0], grid_size[1])) self.robot = Robot(robot_env, robot_state, robot_goal, grid_size, discount) robot_action = self.robot.optimal_policy() self.infer = Inference(grid_size, discount, robot_state, beta, robot_goal, robot_action, obs_sizes) # dstb = self.infer.prior # sorted_ind = np.argsort(dstb) # highest_theta_ind = sorted_ind[-1] # highest_theta = self.infer.thetas[highest_theta_ind] # robot_env = highest_theta def full_pipeline(self): dstb = self.infer.prior self.infer.visualizations(dstb) while self.robot.state != self.robot.goal: print(self.robot.environment) # Get the robot's next action as predicted by the human robot_action = self.robot.optimal_policy() self.infer.robot_action = robot_action # Human gives correction policy_index = self.human.give_correction(self.robot.state, robot_action) if policy_index != 8: print("Human gave a correction! The correction was", policy_index, ", the robot state was", self.robot.state, ", and the robot action was", robot_action) else: print("Human gave no correction. The robot state was", self.robot.state, ", and the robot action was", robot_action) dstb, time = self.infer.exact_inference(policy_index) print("Inference took time", time) sorted_ind = np.argsort(dstb) highest_theta_ind = sorted_ind[-1] highest_theta = self.infer.thetas[highest_theta_ind] # The robot's new environment is the MAP of the inference self.robot.update_env(highest_theta) # theta, time = self.robot.update_theta(policy_index, robot_action) # self.robot.update_env(theta) # print(time) # Robot "replans" self.robot.move(policy_index) self.infer.robot_state = self.robot.state def full_pipeline_qmdp(self): dstb = self.infer.prior self.infer.visualizations(dstb) while self.robot.state != self.robot.goal: # Get the robot's next action as predicted by the human robot_action = self.robot.optimal_policy_qmdp( dstb, self.infer.thetas) self.infer.robot_action = robot_action # Human gives correction policy_index = self.human.give_correction(self.robot.state, robot_action) if policy_index != 8: print("Human gave a correction! The correction was", policy_index, ", the robot state was", self.robot.state, ", and the robot action was", robot_action) else: print("Human gave no correction. The robot state was", self.robot.state, ", and the robot action was", robot_action) dstb, time = self.infer.exact_inference(policy_index) print("Inference took time", time) sorted_ind = np.argsort(dstb) highest_theta_ind = sorted_ind[-1] highest_theta = self.infer.thetas[highest_theta_ind] self.robot.update_env(highest_theta) self.robot.move_qmdp(policy_index, dstb, self.infer.thetas) self.infer.robot_state = self.robot.state def update_goal(self, goal): self.human.update_goal(goal) self.robot.update_goal(goal) self.infer.update_thetas(goal) def update_robot_state(self, state): self.robot.state = state self.infer.robot_state = state
import numpy as np from Inference import Inference import time grid_size = [5, 5] obs_sizes = [[1, 1], [2, 2]] infer = Inference(grid_size, discount=0.9, robot_state=[1, 1], beta=10, robot_goal=[2, 4], robot_action=2, obs_sizes=obs_sizes) infer.exact_inference(policy_index=0) # infer = Inference(grid_size, discount=0.9, robot_state=[4, 0], beta=10, robot_goal=[2, 4], robot_action=0, obs_sizes=obs_sizes) # # infer.exact_inference(policy_index=2) # # infer.robot_state = [4, 1] # infer.robot_action = 2 # # infer.exact_inference(policy_index=8) # # infer.robot_state = [4, 2] # infer.robot_action = 2 # # infer.exact_inference(policy_index=8) #