-
Notifications
You must be signed in to change notification settings - Fork 0
/
Navigation.py
103 lines (77 loc) · 3.93 KB
/
Navigation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import matplotlib.pyplot as plt
import numpy as np
from Human import Human
from Inference import Inference
from Robot import Robot
class Navigation:
def __init__(self, grid_size, discount, robot_state, beta, robot_goal, obs_sizes, true_env):
self.human = Human(true_env, beta, discount, robot_goal, grid_size)
robot_env = np.zeros((grid_size[0], grid_size[1]))
self.robot = Robot(robot_env, robot_state, robot_goal, grid_size, discount)
robot_action = self.robot.optimal_policy()
self.infer = Inference(grid_size, discount, robot_state, beta, robot_goal, robot_action,
obs_sizes)
# dstb = self.infer.prior
# sorted_ind = np.argsort(dstb)
# highest_theta_ind = sorted_ind[-1]
# highest_theta = self.infer.thetas[highest_theta_ind]
# robot_env = highest_theta
def full_pipeline(self):
dstb = self.infer.prior
self.infer.visualizations(dstb)
while self.robot.state != self.robot.goal:
print(self.robot.environment)
# Get the robot's next action as predicted by the human
robot_action = self.robot.optimal_policy()
self.infer.robot_action = robot_action
# Human gives correction
policy_index = self.human.give_correction(self.robot.state, robot_action)
if policy_index != 8:
print("Human gave a correction! The correction was", policy_index, ", the robot state was",
self.robot.state, ", and the robot action was", robot_action)
else:
print("Human gave no correction. The robot state was",
self.robot.state, ", and the robot action was", robot_action)
dstb, time = self.infer.exact_inference(policy_index)
print("Inference took time", time)
sorted_ind = np.argsort(dstb)
highest_theta_ind = sorted_ind[-1]
highest_theta = self.infer.thetas[highest_theta_ind]
# The robot's new environment is the MAP of the inference
self.robot.update_env(highest_theta)
# theta, time = self.robot.update_theta(policy_index, robot_action)
# self.robot.update_env(theta)
# print(time)
# Robot "replans"
self.robot.move(policy_index)
self.infer.robot_state = self.robot.state
def full_pipeline_qmdp(self):
dstb = self.infer.prior
self.infer.visualizations(dstb)
while self.robot.state != self.robot.goal:
# Get the robot's next action as predicted by the human
robot_action = self.robot.optimal_policy_qmdp(dstb, self.infer.thetas)
self.infer.robot_action = robot_action
# Human gives correction
policy_index = self.human.give_correction(self.robot.state, robot_action)
if policy_index != 8:
print("Human gave a correction! The correction was", policy_index, ", the robot state was",
self.robot.state, ", and the robot action was", robot_action)
else:
print("Human gave no correction. The robot state was",
self.robot.state, ", and the robot action was", robot_action)
dstb, time = self.infer.exact_inference(policy_index)
print("Inference took time", time)
sorted_ind = np.argsort(dstb)
highest_theta_ind = sorted_ind[-1]
highest_theta = self.infer.thetas[highest_theta_ind]
self.robot.update_env(highest_theta)
self.robot.move_qmdp(policy_index, dstb, self.infer.thetas)
self.infer.robot_state = self.robot.state
def update_goal(self, goal):
self.human.update_goal(goal)
self.robot.update_goal(goal)
self.infer.update_thetas(goal)
def update_robot_state(self, state):
self.robot.state = state
self.infer.robot_state = state