def step(self, action): self.agent.update(action, self.target.state) # Update the true target state self.target.update() # Observe measurements = self.agent.observation(self.target.target) obstacles_pt = map_utils.get_cloest_obstacle(self.MAP, self.agent.state) # Update the belief of the agent on the target using KF GaussianBelief = IGL.MultiTargetFilter(measurements, self.agent.agent, debug=False) self.agent.update_belief(GaussianBelief) self.belief_target.update(self.agent.get_belief_state(), self.agent.get_belief_cov()) reward, done, test_reward = self.get_reward(obstacles_pt, measurements[0].validity, self.is_training) if obstacles_pt is None: obstacles_pt = (self.sensor_r, np.pi) r_b, alpha_b, _ = util.relative_measure( self.agent.get_belief_state()[:2], self.agent.state) rel_target_vel = util.coord_change2b(self.agent.get_belief_state()[:2], alpha_b + self.agent.state[-1]) self.state = np.array([ r_b, alpha_b, rel_target_vel[0], rel_target_vel[1], np.log(LA.det(self.agent.get_belief_cov())), float(measurements[0].validity), obstacles_pt[0], obstacles_pt[1] ]) return self.state, reward, done, {'test_reward': test_reward}
def reset(self, init_random=True): self.state = [] if init_random: if self.MAP.map is None: a_init = self.agent_init_pos[:2] self.agent.reset(self.agent_init_pos) else: isvalid = False while (not isvalid): a_init = np.random.random( (2, )) * (self.MAP.mapmax - self.MAP.mapmin) + self.MAP.mapmin isvalid = not (map_utils.is_collision( self.MAP, a_init, MARGIN2WALL)) self.agent.reset([ a_init[0], a_init[1], np.random.random() * 2 * np.pi - np.pi ]) for i in range(self.num_targets): t_init = np.load("path_sh_%d.npy" % i)[0][:2] self.belief_targets[i].reset(init_state=np.concatenate( (t_init + 10 * (np.random.rand(2) - 0.5), np.zeros(2))), init_cov=self.target_init_cov) self.targets[i].reset( np.concatenate((t_init, self.target_init_vel))) r, alpha, _ = util.relative_measure( self.belief_targets[i].state, self.agent.state) logdetcov = np.log(LA.det(self.belief_targets[i].cov)) self.state.extend([r, alpha, 0.0, 0.0, logdetcov, 0.0]) self.state.extend([self.sensor_r, np.pi]) self.state = np.array(self.state) return self.state
def step(self, action): action_val = self.action_map[action] boundary_penalty = self.agent.update( action_val, [t.state[:2] for t in self.targets]) obstacles_pt = map_utils.get_cloest_obstacle(self.MAP, self.agent.state) observed = [] for i in range(self.num_targets): self.targets[i].update(self.agent.state[:2]) # Observe obs = self.observation(self.targets[i]) observed.append(obs[0]) # Update the belief of the agent on the target using KF self.belief_targets[i].update(obs[0], obs[1], self.agent.state) reward, done, test_reward = self.get_reward(obstacles_pt, observed, self.is_training) self.state = [] if obstacles_pt is None: obstacles_pt = (self.sensor_r, np.pi) for i in range(self.num_targets): r_b, alpha_b, _ = util.relative_measure( self.belief_targets[i].state, self.agent.state) rel_target_vel = util.coord_change2b( self.belief_targets[i].state[2:], alpha_b + self.agent.state[-1]) self.state.extend([ r_b, alpha_b, rel_target_vel[0], rel_target_vel[1], np.log(LA.det(self.belief_targets[i].cov)), float(observed[i]) ]) self.state.extend([obstacles_pt[0], obstacles_pt[1]]) self.state = np.array(self.state) return self.state, reward, done, {'test_reward': test_reward}
def observation(self, target): r, alpha, _ = util.relative_measure(target.state, self.agent.state) # True Value observed = (r <= self.sensor_r) \ & (abs(alpha) <= self.fov/2/180*np.pi) \ & (not(map_utils.is_blocked(self.MAP, self.agent.state, target.state))) z = None if observed: z = np.array([r, alpha]) z += np.random.multivariate_normal(np.zeros(2, ), self.observation_noise(z)) return observed, z
def update(self, observed, z_t, x_t, u_t=None): # Kalman Filter Update self.ukf.predict(u=u_t) if observed: r_pred, alpha_pred, _ = util.relative_measure(self.ukf.x, x_t) self.ukf.update(z_t, R=self.obs_noise_func((r_pred, alpha_pred)), agent_state=x_t) cov_new = self.ukf.P state_new = self.ukf.x if LA.det(cov_new) < 1e6: self.cov = cov_new if not(self.collision_func(state_new[:2])): self.state = np.clip(state_new, self.limit[0], self.limit[1])
def reset(self, init_random=True): self.state = [] if init_random: if self.MAP.map is None: a_init = self.agent_init_pos[:2] self.agent.reset(self.agent_init_pos) else: isvalid = False while (not isvalid): a_init = np.random.random( (2, )) * (self.MAP.mapmax - self.MAP.mapmin) + self.MAP.mapmin isvalid = not (map_utils.is_collision( self.MAP, a_init, MARGIN2WALL)) self.agent.reset([ a_init[0], a_init[1], np.random.random() * 2 * np.pi - np.pi ]) for i in range(self.num_targets): isvalid = False while (not isvalid): rand_ang = np.random.rand() * 2 * np.pi - np.pi t_r = INIT_DISTANCE t_init = np.array([ t_r * np.cos(rand_ang), t_r * np.sin(rand_ang) ]) + a_init if (np.sqrt(np.sum((t_init - a_init)**2)) < MARGIN): isvalid = False else: isvalid = not (map_utils.is_collision( self.MAP, t_init, MARGIN2WALL)) self.belief_targets[i].reset(init_state=np.concatenate( (t_init + 10 * (np.random.rand(2) - 0.5), np.zeros(2))), init_cov=self.target_init_cov) self.targets[i].reset( np.concatenate((t_init, self.target_init_vel))) r, alpha, _ = util.relative_measure( self.belief_targets[i].state, self.agent.state) logdetcov = np.log(LA.det(self.belief_targets[i].cov)) self.state.extend([r, alpha, 0.0, 0.0, logdetcov, 0.0]) self.state.extend([self.sensor_r, np.pi]) self.state = np.array(self.state) self.local_map = map_utils.local_map(self.MAP, self.im_size, self.agent.state) return np.concatenate((self.local_map.flatten(), self.state))
def step(self, action): self.agent.update(action, self.targets.state) # Update the true target state self.targets.update() # Observe measurements = self.agent.observation(self.targets.target) obstacles_pt = map_utils.get_cloest_obstacle(self.MAP, self.agent.state) # Update the belief of the agent on the target using KF GaussianBelief = IGL.MultiTargetFilter(measurements, self.agent.agent, debug=False) self.agent.update_belief(GaussianBelief) self.belief_targets.update(self.agent.get_belief_state(), self.agent.get_belief_cov()) observed = [m.validity for m in measurements] reward, done, test_reward = self.get_reward(obstacles_pt, observed, self.is_training) if obstacles_pt is None: obstacles_pt = (self.sensor_r, np.pi) self.state = [] target_b_state = self.agent.get_belief_state() target_b_cov = self.agent.get_belief_cov() for n in range(self.num_targets): r_b, alpha_b, _ = util.relative_measure( target_b_state[self.target_dim * n:self.target_dim * n + 2], self.agent.state) rel_target_vel = util.coord_change2b( target_b_state[self.target_dim * n:self.target_dim * n + 2], alpha_b + self.agent.state[-1]) self.state.extend([ r_b, alpha_b, rel_target_vel[0], rel_target_vel[1], np.log( LA.det(target_b_cov[self.target_dim * n:self.target_dim * (n + 1), self.target_dim * n:self.target_dim * (n + 1)])), float(observed[n]) ]) self.state.extend([obstacles_pt[0], obstacles_pt[1]]) self.state = np.array(self.state) return self.state, reward, done, {'test_reward': test_reward}
def update(self, observed, z_t, x_t): # Kalman Filter Prediction and Update # Prediction state_predicted = np.matmul(self.A, self.state) cov_predicted = np.matmul(np.matmul(self.A, self.cov), self.A.T)+ self.W # Update if observed: r_pred, alpha_pred, diff_pred = util.relative_measure(state_predicted, x_t) if self.dim == 2: Hmat = np.array([[diff_pred[0],diff_pred[1]], [-diff_pred[1]/r_pred, diff_pred[0]/r_pred]])/r_pred elif self.dim == 4: Hmat = np.array([[diff_pred[0], diff_pred[1], 0.0, 0.0], [-diff_pred[1]/r_pred, diff_pred[0]/r_pred, 0.0, 0.0]])/r_pred else: raise ValueError('target dimension for KF must be either 2 or 4') innov = z_t - np.array([r_pred, alpha_pred]) innov[1] = util.wrap_around(innov[1]) R = np.matmul(np.matmul(Hmat, cov_predicted), Hmat.T) \ + self.obs_noise_func((r_pred, alpha_pred)) K = np.matmul(np.matmul(cov_predicted, Hmat.T), LA.inv(R)) C = np.eye(self.dim) - np.matmul(K, Hmat) cov_new = np.matmul(C, cov_predicted) state_new = state_predicted + np.matmul(K, innov) else: cov_new = cov_predicted state_new = state_predicted if LA.det(cov_new) < 1e6: self.cov = cov_new if not(self.collision_func(state_new[:2])): self.state = np.clip(state_new, self.limit[0], self.limit[1])
def reset(self, init_random=True): if init_random: isvalid = False while (not isvalid): if self.MAP.map is None: a_init = self.agent_init_pos[:2] else: a_init = np.random.random( (2, )) * (self.MAP.mapmax - self.MAP.mapmin) + self.MAP.mapmin if not (map_utils.is_collision(self.MAP, a_init, MARGIN2WALL)): rand_ang = np.random.rand() * 2 * np.pi - np.pi t_r = INIT_DISTANCE t_init = np.array([ t_r * np.cos(rand_ang), t_r * np.sin(rand_ang) ]) + a_init if (np.sqrt(np.sum((t_init - a_init)**2)) < MARGIN): isvalid = False else: isvalid = not (map_utils.is_collision( self.MAP, t_init, MARGIN2WALL)) a_init = IGL.SE3Pose( np.concatenate( (a_init, [np.random.random() * 2 * np.pi - np.pi])), np.array([0, 0, 0, 1])) else: a_init = IGL.SE3Pose(self.agent_init_pos, np.array([0, 0, 0, 1])) t_init = self.target_init_pos # Build a target target = self.cfg.setup_se2_targets( n_targets=self.num_targets, init_odom=[ np.concatenate( (t_init, [np.random.rand() * 2 * np.pi - np.pi])) ], q=self.q, policy=Policy.random_policy( 0.5, 60 ) #, collision_func=lambda x: map_utils.is_collision(self.MAP, x, MARGIN2WALL)) ) # Integrator Ground truth Model belief_target = self.cfg.setup_integrator_belief( n_targets=self.num_targets, q=self.q, init_pos=[t_init + 10. * (np.random.rand(2) - 0.5)], cov_pos=self.target_init_cov, cov_vel=self.target_init_cov, init_vel=(0.0, 0.0)) # Build a robot self.agent.reset(a_init, belief_target) r, alpha, _ = util.relative_measure(self.agent.get_belief_state()[:2], self.agent.state) self.state = np.array([ r, alpha, 0.0, 0.0, np.log(LA.det(self.agent.get_belief_cov())), 0.0, self.sensor_r, np.pi ]) self.target.reset(target) self.belief_target.update(self.agent.get_belief_state(), self.agent.get_belief_cov()) return np.array(self.state)
def reset(self, init_random=True): self.state = [] t_init_sets = [] t_init_b_sets = [] if init_random: if self.MAP.map is None: a_init = self.agent_init_pos a_init_igl = IGL.SE3Pose(self.agent_init_pos, np.array([0, 0, 0, 1])) else: isvalid = False while (not isvalid): a_init = np.random.random((2, )) * ( (self.MAP.mapmax - MARGIN) - self.MAP.mapmin) + self.MAP.mapmin - .5 * MARGIN isvalid = not (map_utils.is_collision( self.MAP, a_init, MARGIN2WALL)) a_init = np.concatenate( (a_init, [np.random.random() * 2 * np.pi - np.pi])) a_init_igl = IGL.SE3Pose(a_init, np.array([0, 0, 0, 1])) for i in range(self.num_targets): isvalid = False while (not isvalid): rand_ang = np.random.rand() * 2 * np.pi - np.pi t_r = INIT_DISTANCE t_init = np.array([ t_r * np.cos(rand_ang), t_r * np.sin(rand_ang) ]) + a_init[:2] if (np.sqrt(np.sum((t_init - a_init[:2])**2)) < MARGIN): isvalid = False else: isvalid = not (map_utils.is_collision( self.MAP, t_init, MARGIN2WALL)) t_init_b_sets.append(t_init + 10 * (np.random.rand(2) - 0.5)) t_init_sets.append(t_init) r, alpha, _ = util.relative_measure(t_init_b_sets[-1][:2], a_init) logdetcov = np.log( LA.det(self.target_init_cov * np.eye(self.target_dim))) self.state.extend([r, alpha, 0.0, 0.0, logdetcov, 0.0]) self.state.extend([self.sensor_r, np.pi]) self.state = np.array(self.state) # Build a target target = self.cfg.setup_integrator_targets( n_targets=self.num_targets, init_pos=t_init_sets, init_vel=self.target_init_vel, q=self.q, max_vel=self.vel_limit[0]) # Integrator Ground truth Model belief_target = self.cfg.setup_integrator_belief( n_targets=self.num_targets, q=self.q, init_pos=t_init_b_sets, cov_pos=self.target_init_cov, cov_vel=self.target_init_cov, init_vel=(0.0, 0.0)) # Build a robot self.agent.reset(a_init_igl, belief_target) self.targets.reset(target) self.belief_targets.update(self.agent.get_belief_state(), self.agent.get_belief_cov()) return np.array(self.state)