def reset(self): poses = self.get_form_goal().reshape((3,-1)) # log.out(poses) [a.reset(Pose(*poses[i])) for i, a in enumerate(self.agents)] self.goal = self.get_form_goal() self.goal_changed = True return self.compute_obs()
def sample_pose(self, limits=None): if limits is None: x, y = random(2)*self.w_limits - self.w_limits/2 else: x, y = random(2)*limits - limits/2 theta = (random()*2 - 1)*np.pi return Pose(x=x, y=y, t=theta)
def __init__(self, id, pose=Pose(), \ defaultPose=False, collisionRadius=0.15, lenTrajectory=100): self.type = "RLAgent" self.id = id self.edges = {} self.edgeControllers = {} super(RLAgent, self).__init__(id, pose=pose, defaultPose=defaultPose, \ collisionRadius=collisionRadius, lenTrajectory=lenTrajectory)
def step(self, action): assert self.goal is not None, "Call reset before calling step" prev_pose = Pose(*self.agent.pose.tolist()) for i in range(self.num_iter): self.agent.step(action) reward, done = self.get_reward() return self.compute_obs(prev_pose), reward, done,\ {"dist": self.current_distance, "is_success": done}
def compute_reward(self): reward, done = -self.step_penalty, False sides = sorted([Pose.dist(i.pose, j.pose) for i in self.agents for j in self.agents if i.id < j.id]) # log.out(sides) if (np.abs(sides - np.array(self.goal_sides)) < 0.15).all(): error.out("\nHURRAY\n{}\n{}\n{}\n{}\n".format(self.goal_sides, sides, [i.pose for i in self.agents], sides - np.array(self.goal_sides))) reward, done = self.max_reward, True return reward, done, {"success": done}
def __init__(self, id, pose=None, defaultPose=False, collisionRadius=0.15): assert defaultPose == False or isinstance( defaultPose, Pose), ERR.TYPE_MISMATCH(defaultPose, Pose) self.type = "AGENT" self.id = id self.defaultPose = defaultPose if defaultPose else copy.deepcopy(pose) self.collisionRadius = collisionRadius if pose == None: pose = Pose() self.reset(pose)
def sample_pose(self): return Pose(*(random(3) * self.limits - self.limits / 2))
def randPose(): return Pose(*(np.random.random(3) * k - k / 2.).tolist())
def rnd(x=5): return np.random.random() * x - x / 2. def randomtarget(): geometry = { 'coordinates': [[rnd(), rnd()], [rnd(), rnd()]], 'orientations': [rnd(2 * np.pi)] * 2 } geometry['coordinates'] = map(np.array, geometry['coordinates']) return ShapeByGeometry(geometry) ############### AGENTS agents_pose = [Pose(0, 0), Pose(0, -2, -np.pi / 2)] agents = [RLAgent(i, pose=agents_pose[i]) for i in range(NUM_AGENTS)] eval_agents = [RLAgent(i, pose=agents_pose[i]) for i in range(NUM_AGENTS)] ############### ENVIRONMENT env = FormationEnvironment(targetshape, agents, num_iterations=HP.NUM_ITERATIONS, dt=HP.DT) eval_env = FormationEnvironment(targetshape, eval_agents, num_iterations=HP.NUM_ITERATIONS, dt=HP.DT) ############### PARTIALLY OBSERVED ENVS agent_observed_envs = {}