예제 #1
0
class Navigation:
    def __init__(self, grid_size, discount, robot_state, beta, robot_goal,
                 obs_sizes, true_env):
        self.human = Human(true_env, beta, discount, robot_goal, grid_size)
        robot_env = np.zeros((grid_size[0], grid_size[1]))
        self.robot = Robot(robot_env, robot_state, robot_goal, grid_size,
                           discount)

        robot_action = self.robot.optimal_policy()
        self.infer = Inference(grid_size, discount, robot_state, beta,
                               robot_goal, robot_action, obs_sizes)

        # dstb = self.infer.prior
        # sorted_ind = np.argsort(dstb)
        # highest_theta_ind = sorted_ind[-1]
        # highest_theta = self.infer.thetas[highest_theta_ind]
        # robot_env = highest_theta

    def full_pipeline(self):
        dstb = self.infer.prior
        self.infer.visualizations(dstb)
        while self.robot.state != self.robot.goal:
            print(self.robot.environment)
            # Get the robot's next action as predicted by the human
            robot_action = self.robot.optimal_policy()
            self.infer.robot_action = robot_action

            # Human gives correction
            policy_index = self.human.give_correction(self.robot.state,
                                                      robot_action)
            if policy_index != 8:
                print("Human gave a correction! The correction was",
                      policy_index, ", the robot state was", self.robot.state,
                      ", and the robot action was", robot_action)
            else:
                print("Human gave no correction. The robot state was",
                      self.robot.state, ", and the robot action was",
                      robot_action)
            dstb, time = self.infer.exact_inference(policy_index)
            print("Inference took time", time)
            sorted_ind = np.argsort(dstb)
            highest_theta_ind = sorted_ind[-1]
            highest_theta = self.infer.thetas[highest_theta_ind]

            # The robot's new environment is the MAP of the inference
            self.robot.update_env(highest_theta)

            # theta, time = self.robot.update_theta(policy_index, robot_action)
            # self.robot.update_env(theta)
            # print(time)

            # Robot "replans"
            self.robot.move(policy_index)
            self.infer.robot_state = self.robot.state

    def full_pipeline_qmdp(self):
        dstb = self.infer.prior
        self.infer.visualizations(dstb)
        while self.robot.state != self.robot.goal:
            # Get the robot's next action as predicted by the human
            robot_action = self.robot.optimal_policy_qmdp(
                dstb, self.infer.thetas)
            self.infer.robot_action = robot_action

            # Human gives correction
            policy_index = self.human.give_correction(self.robot.state,
                                                      robot_action)
            if policy_index != 8:
                print("Human gave a correction! The correction was",
                      policy_index, ", the robot state was", self.robot.state,
                      ", and the robot action was", robot_action)
            else:
                print("Human gave no correction. The robot state was",
                      self.robot.state, ", and the robot action was",
                      robot_action)
            dstb, time = self.infer.exact_inference(policy_index)
            print("Inference took time", time)

            sorted_ind = np.argsort(dstb)
            highest_theta_ind = sorted_ind[-1]
            highest_theta = self.infer.thetas[highest_theta_ind]
            self.robot.update_env(highest_theta)

            self.robot.move_qmdp(policy_index, dstb, self.infer.thetas)
            self.infer.robot_state = self.robot.state

    def update_goal(self, goal):
        self.human.update_goal(goal)
        self.robot.update_goal(goal)
        self.infer.update_thetas(goal)

    def update_robot_state(self, state):
        self.robot.state = state
        self.infer.robot_state = state
import numpy as np
from Inference import Inference
import time

grid_size = [5, 5]
obs_sizes = [[1, 1], [2, 2]]

infer = Inference(grid_size,
                  discount=0.9,
                  robot_state=[1, 1],
                  beta=10,
                  robot_goal=[2, 4],
                  robot_action=2,
                  obs_sizes=obs_sizes)

infer.exact_inference(policy_index=0)

# infer = Inference(grid_size, discount=0.9, robot_state=[4, 0], beta=10, robot_goal=[2, 4], robot_action=0, obs_sizes=obs_sizes)
#
# infer.exact_inference(policy_index=2)
#
# infer.robot_state = [4, 1]
# infer.robot_action = 2
#
# infer.exact_inference(policy_index=8)
#
# infer.robot_state = [4, 2]
# infer.robot_action = 2
#
# infer.exact_inference(policy_index=8)
#