class CrowdSim(gym.Env): def __init__(self): """ Movement simulation for n+1 agents Agent can either be human or robot. humans are controlled by a unknown and fixed policy. robot is controlled by a known and learnable policy. """ self.time_limit = None self.time_step = None self.robot = None # a Robot instance representing the robot self.humans = None # a list of Human instances, representing all humans in the environment self.global_time = None # reward function self.success_reward = None self.collision_penalty = None self.discomfort_dist = None self.discomfort_dist_front = None self.discomfort_penalty_factor = None # simulation configuration self.config = None self.case_capacity = None self.case_size = None self.case_counter = None self.randomize_attributes = None self.circle_radius = None self.human_num = None self.action_space = None self.observation_space = None # limit FOV self.robot_fov = None self.human_fov = None self.dummy_human = None self.dummy_robot = None #seed self.thisSeed = None # the seed will be set when the env is created #nenv self.nenv = None # the number of env will be set when the env is created. # Because the human crossing cases are controlled by random seed, we will calculate unique random seed for each # parallel env. self.phase = None # set the phase to be train, val or test self.test_case = None # the test case ID, which will be used to calculate a seed to generate a human crossing case # for render self.render_axis = None self.humans = [] self.potential = None def configure(self, config): self.config = config self.time_limit = config.env.time_limit self.time_step = config.env.time_step self.randomize_attributes = config.env.randomize_attributes self.success_reward = config.reward.success_reward self.collision_penalty = config.reward.collision_penalty self.discomfort_dist = config.reward.discomfort_dist_back self.discomfort_dist_front = config.reward.discomfort_dist_front self.discomfort_penalty_factor = config.reward.discomfort_penalty_factor if self.config.humans.policy == 'orca' or self.config.humans.policy == 'social_force': self.case_capacity = { 'train': np.iinfo(np.uint32).max - 2000, 'val': 1000, 'test': 1000 } self.case_size = { 'train': np.iinfo(np.uint32).max - 2000, 'val': self.config.env.val_size, 'test': self.config.env.test_size } self.circle_radius = config.sim.circle_radius self.human_num = config.sim.human_num self.group_human = config.sim.group_human else: raise NotImplementedError self.case_counter = {'train': 0, 'test': 0, 'val': 0} logging.info('human number: {}'.format(self.human_num)) if self.randomize_attributes: logging.info("Randomize human's radius and preferred speed") else: logging.info("Not randomize human's radius and preferred speed") logging.info('Circle width: {}'.format(self.circle_radius)) self.robot_fov = np.pi * config.robot.FOV self.human_fov = np.pi * config.humans.FOV logging.info('robot FOV %f', self.robot_fov) logging.info('humans FOV %f', self.human_fov) # set dummy human and dummy robot # dummy humans, used if any human is not in view of other agents self.dummy_human = Human(self.config, 'humans') # if a human is not in view, set its state to (px = 100, py = 100, vx = 0, vy = 0, theta = 0, radius = 0) self.dummy_human.set(7, 7, 7, 7, 0, 0, 0) # (7, 7, 7, 7, 0, 0, 0) self.dummy_human.time_step = config.env.time_step self.dummy_robot = Robot(self.config, 'robot') self.dummy_robot.set(7, 7, 7, 7, 0, 0, 0) self.dummy_robot.time_step = config.env.time_step self.dummy_robot.kinematics = 'holonomic' self.dummy_robot.policy = ORCA(config) self.r = self.config.humans.radius # configure noise in state self.add_noise = config.noise.add_noise if self.add_noise: self.noise_type = config.noise.type self.noise_magnitude = config.noise.magnitude # configure randomized goal changing of humans midway through episode self.random_goal_changing = config.humans.random_goal_changing if self.random_goal_changing: self.goal_change_chance = config.humans.goal_change_chance # configure randomized goal changing of humans after reaching their respective goals self.end_goal_changing = config.humans.end_goal_changing if self.end_goal_changing: self.end_goal_change_chance = config.humans.end_goal_change_chance # configure randomized radii changing when reaching goals self.random_radii = config.humans.random_radii # configure randomized v_pref changing when reaching goals self.random_v_pref = config.humans.random_v_pref # configure randomized goal changing of humans after reaching their respective goals self.random_unobservability = config.humans.random_unobservability if self.random_unobservability: self.unobservable_chance = config.humans.unobservable_chance self.last_human_states = np.zeros((self.human_num, 5)) # configure randomized policy changing of humans every episode self.random_policy_changing = config.humans.random_policy_changing # set robot for this envs rob_RL = Robot(config, 'robot') self.set_robot(rob_RL) return def set_robot(self, robot): raise NotImplementedError def generate_random_human_position(self, human_num): """ Generate human position: generate start position on a circle, goal position is at the opposite side :param human_num: :return: """ # initial min separation distance to avoid danger penalty at beginning for i in range(human_num): self.humans.append(self.generate_circle_crossing_human()) # return a static human in env # position: (px, py) for fixed position, or None for random position def generate_circle_static_obstacle(self, position=None): # generate a human with radius = 0.3, v_pref = 1, visible = True, and policy = orca human = Human(self.config, 'humans') # For fixed position if position: px, py = position # For random position else: while True: angle = np.random.random() * np.pi * 2 # add some noise to simulate all the possible cases robot could meet with human v_pref = 1.0 if human.v_pref == 0 else human.v_pref px_noise = (np.random.random() - 0.5) * v_pref py_noise = (np.random.random() - 0.5) * v_pref px = self.circle_radius * np.cos(angle) + px_noise py = self.circle_radius * np.sin(angle) + py_noise collide = False for agent in [self.robot] + self.humans: min_dist = human.radius + agent.radius + self.discomfort_dist if norm((px - agent.px, py - agent.py)) < min_dist or \ norm((px - agent.gx, py - agent.gy)) < min_dist: collide = True break if not collide: break # make it a static obstacle # px, py, gx, gy, vx, vy, theta human.set(px, py, px, py, 0, 0, 0, v_pref=0) return human def generate_circle_crossing_human(self): human = Human(self.config, 'humans') if self.randomize_attributes: human.sample_random_attributes() while True: angle = np.random.random() * np.pi * 2 # add some noise to simulate all the possible cases robot could meet with human v_pref = 1.0 if human.v_pref == 0 else human.v_pref px_noise = (np.random.random() - 0.5) * v_pref py_noise = (np.random.random() - 0.5) * v_pref px = self.circle_radius * np.cos(angle) + px_noise py = self.circle_radius * np.sin(angle) + py_noise collide = False if self.group_human: collide = self.check_collision_group((px, py), human.radius) else: for i, agent in enumerate([self.robot] + self.humans): # keep human at least 3 meters away from robot if self.robot.kinematics == 'unicycle' and i == 0: min_dist = self.circle_radius / 2 # Todo: if circle_radius <= 4, it will get stuck here else: min_dist = human.radius + agent.radius + self.discomfort_dist if norm((px - agent.px, py - agent.py)) < min_dist or \ norm((px - agent.gx, py - agent.gy)) < min_dist: collide = True break if not collide: break # px = np.random.uniform(-6, 6) # py = np.random.uniform(-3, 3.5) # human.set(px, py, px, py, 0, 0, 0) human.set(px, py, -px, -py, 0, 0, 0) return human # add noise according to env.config to state def apply_noise(self, ob): if isinstance(ob[0], ObservableState): for i in range(len(ob)): if self.noise_type == 'uniform': noise = np.random.uniform(-self.noise_magnitude, self.noise_magnitude, 5) elif self.noise_type == 'gaussian': noise = np.random.normal(size=5) else: print('noise type not defined') ob[i].px = ob[i].px + noise[0] ob[i].py = ob[i].px + noise[1] ob[i].vx = ob[i].px + noise[2] ob[i].vy = ob[i].px + noise[3] ob[i].radius = ob[i].px + noise[4] return ob else: if self.noise_type == 'uniform': noise = np.random.uniform(-self.noise_magnitude, self.noise_magnitude, len(ob)) elif self.noise_type == 'gaussian': noise = np.random.normal(size=len(ob)) else: print('noise type not defined') noise = [0] * len(ob) return ob + noise # update the robot belief of human states # if a human is visible, its state is updated to its current ground truth state # else we assume it keeps going in a straight line with last observed velocity def update_last_human_states(self, human_visibility, reset): """ update the self.last_human_states array human_visibility: list of booleans returned by get_human_in_fov (e.x. [T, F, F, T, F]) reset: True if this function is called by reset, False if called by step :return: """ # keep the order of 5 humans at each timestep for i in range(self.human_num): if human_visibility[i]: humanS = np.array(self.humans[i].get_observable_state_list()) self.last_human_states[i, :] = humanS else: if reset: humanS = np.array([15., 15., 0., 0., 0.3]) self.last_human_states[i, :] = humanS else: px, py, vx, vy, r = self.last_human_states[i, :] # Plan A: linear approximation of human's next position px = px + vx * self.time_step py = py + vy * self.time_step self.last_human_states[i, :] = np.array( [px, py, vx, vy, r]) # Plan B: assume the human doesn't move, use last observation # self.last_human_states[i, :] = np.array([px, py, 0., 0., r]) # return the ground truth locations of all humans def get_true_human_states(self): true_human_states = np.zeros((self.human_num, 2)) for i in range(self.human_num): humanS = np.array(self.humans[i].get_observable_state_list()) true_human_states[i, :] = humanS[:2] return true_human_states def randomize_human_policies(self): """ Randomize the moving humans' policies to be either orca or social force """ for human in self.humans: if not human.isObstacle: new_policy = random.choice(['orca', 'social_force']) new_policy = policy_factory[new_policy]() human.set_policy(new_policy) # Generates group of circum_num humans in a circle formation at a random viable location def generate_circle_group_obstacle(self, circum_num): group_circumference = self.config.getfloat('humans', 'radius') * 2 * circum_num # print("group circum: ", group_circumference) group_radius = group_circumference / (2 * np.pi) # print("group radius: ", group_radius) while True: rand_cen_x = np.random.uniform(-3, 3) rand_cen_y = np.random.uniform(-3, 3) success = True for i, group in enumerate(self.circle_groups): # print(i) dist_between_groups = np.sqrt((rand_cen_x - group[1])**2 + (rand_cen_y - group[2])**2) sum_radius = group_radius + group[ 0] + 2 * self.config.getfloat('humans', 'radius') if dist_between_groups < sum_radius: success = False break if success: # print("------------\nsuccessfully found valid x: ", rand_cen_x, " y: ", rand_cen_y) break self.circle_groups.append((group_radius, rand_cen_x, rand_cen_y)) # print("current groups:") # for i in self.circle_groups: # print(i) arc = 2 * np.pi / circum_num for i in range(circum_num): angle = arc * i curr_x = rand_cen_x + group_radius * np.cos(angle) curr_y = rand_cen_y + group_radius * np.sin(angle) point = (curr_x, curr_y) # print("adding circle point: ", point) curr_human = self.generate_circle_static_obstacle(point) curr_human.isObstacle = True self.humans.append(curr_human) return # given an agent's xy location and radius, check whether it collides with all other humans # including the circle group and moving humans # return True if collision, False if no collision def check_collision_group(self, pos, radius): # check circle groups for r, x, y in self.circle_groups: if np.linalg.norm( [pos[0] - x, pos[1] - y] ) <= r + radius + 2 * 0.5: # use 0.5 because it's the max radius of human return True # check moving humans for human in self.humans: if human.isObstacle: pass else: if np.linalg.norm([pos[0] - human.px, pos[1] - human.py ]) <= human.radius + radius: return True return False # check collision between robot goal position and circle groups def check_collision_group_goal(self, pos, radius): # print('check goal', len(self.circle_groups)) collision = False # check circle groups for r, x, y in self.circle_groups: # print(np.linalg.norm([pos[0] - x, pos[1] - y]), r + radius + 4 * 0.5) if np.linalg.norm( [pos[0] - x, pos[1] - y] ) <= r + radius + 4 * 0.5: # use 0.5 because it's the max radius of human collision = True return collision # set robot initial state and generate all humans for reset function # for crowd nav: human_num == self.human_num # for leader follower: human_num = self.human_num - 1 def generate_robot_humans(self, phase, human_num=None): if human_num is None: human_num = self.human_num # for Group environment if self.group_human: # set the robot in a dummy far away location to avoid collision with humans self.robot.set(10, 10, 10, 10, 0, 0, np.pi / 2) # generate humans self.circle_groups = [] humans_left = human_num while humans_left > 0: # print("****************\nhumans left: ", humans_left) if humans_left <= 4: if phase in ['train', 'val']: self.generate_random_human_position( human_num=humans_left) else: self.generate_random_human_position( human_num=humans_left) humans_left = 0 else: if humans_left < 10: max_rand = humans_left else: max_rand = 10 # print("randint from 4 to ", max_rand) circum_num = np.random.randint(4, max_rand) # print("circum num: ", circum_num) self.generate_circle_group_obstacle(circum_num) humans_left -= circum_num # randomize starting position and goal position while keeping the distance of goal to be > 6 # set the robot on a circle with radius 5.5 randomly rand_angle = np.random.uniform(0, np.pi * 2) # print('rand angle:', rand_angle) increment_angle = 0.0 while True: px_r = np.cos(rand_angle + increment_angle) * 5.5 py_r = np.sin(rand_angle + increment_angle) * 5.5 # check whether the initial px and py collides with any human collision = self.check_collision_group((px_r, py_r), self.robot.radius) # if the robot goal does not fall into any human groups, the goal is okay, otherwise keep generating the goal if not collision: #print('initial pos angle:', rand_angle+increment_angle) break increment_angle = increment_angle + 0.2 increment_angle = increment_angle + np.pi # start at opposite side of the circle while True: gx = np.cos(rand_angle + increment_angle) * 5.5 gy = np.sin(rand_angle + increment_angle) * 5.5 # check whether the goal is inside the human groups # check whether the initial px and py collides with any human collision = self.check_collision_group_goal((gx, gy), self.robot.radius) # if the robot goal does not fall into any human groups, the goal is okay, otherwise keep generating the goal if not collision: # print('goal pos angle:', rand_angle + increment_angle) break increment_angle = increment_angle + 0.2 self.robot.set(px_r, py_r, gx, gy, 0, 0, np.pi / 2) # for FoV environment else: if self.robot.kinematics == 'unicycle': angle = np.random.uniform(0, np.pi * 2) px = self.circle_radius * np.cos(angle) py = self.circle_radius * np.sin(angle) while True: gx, gy = np.random.uniform(-self.circle_radius, self.circle_radius, 2) if np.linalg.norm([px - gx, py - gy]) >= 6: # 1 was 6 break self.robot.set(px, py, gx, gy, 0, 0, np.random.uniform( 0, 2 * np.pi)) # randomize init orientation # randomize starting position and goal position else: while True: px, py, gx, gy = np.random.uniform(-self.circle_radius, self.circle_radius, 4) if np.linalg.norm([px - gx, py - gy]) >= 6: break self.robot.set(px, py, gx, gy, 0, 0, np.pi / 2) # generate humans self.generate_random_human_position(human_num=human_num) def reset(self, phase='train', test_case=None): """ Set px, py, gx, gy, vx, vy, theta for robot and humans :return: """ if self.phase is not None: phase = self.phase if self.test_case is not None: test_case = self.test_case if self.robot is None: raise AttributeError('robot has to be set!') assert phase in ['train', 'val', 'test'] if test_case is not None: self.case_counter[ phase] = test_case # test case is passed in to calculate specific seed to generate case self.global_time = 0 self.humans = [] # train, val, and test phase should start with different seed. # case capacity: the maximum number for train(max possible int -2000), val(1000), and test(1000) # val start from seed=0, test start from seed=case_capacity['val']=1000 # train start from self.case_capacity['val'] + self.case_capacity['test']=2000 counter_offset = { 'train': self.case_capacity['val'] + self.case_capacity['test'], 'val': 0, 'test': self.case_capacity['val'] } np.random.seed(counter_offset[phase] + self.case_counter[phase] + self.thisSeed) self.generate_robot_humans(phase) # If configured to randomize human policies, do so if self.random_policy_changing: self.randomize_human_policies() for agent in [self.robot] + self.humans: agent.time_step = self.time_step agent.policy.time_step = self.time_step # case size is used to make sure that the case_counter is always between 0 and case_size[phase] self.case_counter[phase] = (self.case_counter[phase] + int(1 * self.nenv)) % self.case_size[phase] # get current observation ob = self.generate_ob(reset=True) # initialize potential self.potential = -abs( np.linalg.norm( np.array([self.robot.px, self.robot.py]) - np.array([self.robot.gx, self.robot.gy]))) return ob # Update the humans' end goals in the environment # Produces valid end goals for each human def update_human_goals_randomly(self): # Update humans' goals randomly for human in self.humans: if human.isObstacle or human.v_pref == 0: continue if np.random.random() <= self.goal_change_chance: if not self.group_human: # to improve the runtime humans_copy = [] for h in self.humans: if h != human: humans_copy.append(h) # Produce valid goal for human in case of circle setting while True: angle = np.random.random() * np.pi * 2 # add some noise to simulate all the possible cases robot could meet with human v_pref = 1.0 if human.v_pref == 0 else human.v_pref gx_noise = (np.random.random() - 0.5) * v_pref gy_noise = (np.random.random() - 0.5) * v_pref gx = self.circle_radius * np.cos(angle) + gx_noise gy = self.circle_radius * np.sin(angle) + gy_noise collide = False if self.group_human: collide = self.check_collision_group((gx, gy), human.radius) else: for agent in [self.robot] + humans_copy: min_dist = human.radius + agent.radius + self.discomfort_dist if norm((gx - agent.px, gy - agent.py)) < min_dist or \ norm((gx - agent.gx, gy - agent.gy)) < min_dist: collide = True break if not collide: break # Give human new goal human.gx = gx human.gy = gy return # Update the specified human's end goals in the environment randomly def update_human_goal(self, human): # Update human's goals randomly if np.random.random() <= self.end_goal_change_chance: if not self.group_human: humans_copy = [] for h in self.humans: if h != human: humans_copy.append(h) # Update human's radius now that it's reached goal if self.random_radii: human.radius += np.random.uniform(-0.1, 0.1) # Update human's v_pref now that it's reached goal if self.random_v_pref: human.v_pref += np.random.uniform(-0.1, 0.1) while True: angle = np.random.random() * np.pi * 2 # add some noise to simulate all the possible cases robot could meet with human v_pref = 1.0 if human.v_pref == 0 else human.v_pref gx_noise = (np.random.random() - 0.5) * v_pref gy_noise = (np.random.random() - 0.5) * v_pref gx = self.circle_radius * np.cos(angle) + gx_noise gy = self.circle_radius * np.sin(angle) + gy_noise collide = False if self.group_human: collide = self.check_collision_group((gx, gy), human.radius) else: for agent in [self.robot] + humans_copy: min_dist = human.radius + agent.radius + self.discomfort_dist if norm((gx - agent.px, gy - agent.py)) < min_dist or \ norm((gx - agent.gx, gy - agent.gy)) < min_dist: collide = True break if not collide: break # Give human new goal human.gx = gx human.gy = gy return # Caculate whether agent2 is in agent1's FOV # Not the same as whether agent1 is in agent2's FOV!!!! # arguments: # state1, state2: can be agent instance OR state instance # robot1: is True if state1 is robot, else is False # return value: # return True if state2 is visible to state1, else return False def detect_visible(self, state1, state2, robot1=False, custom_fov=None): if self.robot.kinematics == 'holonomic': real_theta = np.arctan2(state1.vy, state1.vx) else: real_theta = state1.theta # angle of center line of FOV of agent1 v_fov = [np.cos(real_theta), np.sin(real_theta)] # angle between agent1 and agent2 v_12 = [state2.px - state1.px, state2.py - state1.py] # angle between center of FOV and agent 2 v_fov = v_fov / np.linalg.norm(v_fov) v_12 = v_12 / np.linalg.norm(v_12) offset = np.arccos(np.clip(np.dot(v_fov, v_12), a_min=-1, a_max=1)) if custom_fov: fov = custom_fov else: if robot1: fov = self.robot_fov else: fov = self.human_fov if np.abs(offset) <= fov / 2: return True else: return False # for robot: # return only visible humans to robot and number of visible humans and visible humans' ids (0 to 4) def get_num_human_in_fov(self): human_ids = [] humans_in_view = [] num_humans_in_view = 0 for i in range(self.human_num): visible = self.detect_visible(self.robot, self.humans[i], robot1=True) if visible: humans_in_view.append(self.humans[i]) num_humans_in_view = num_humans_in_view + 1 human_ids.append(True) else: human_ids.append(False) return humans_in_view, num_humans_in_view, human_ids # convert an np array with length = 34 to a JointState object def array_to_jointstate(self, obs_list): fullstate = FullState(obs_list[0], obs_list[1], obs_list[2], obs_list[3], obs_list[4], obs_list[5], obs_list[6], obs_list[7], obs_list[8]) observable_states = [] for k in range(self.human_num): idx = 9 + k * 5 observable_states.append( ObservableState(obs_list[idx], obs_list[idx + 1], obs_list[idx + 2], obs_list[idx + 3], obs_list[idx + 4])) state = JointState(fullstate, observable_states) return state def last_human_states_obj(self): ''' convert self.last_human_states to a list of observable state objects for old algorithms to use ''' humans = [] for i in range(self.human_num): h = ObservableState(*self.last_human_states[i]) humans.append(h) return humans # find R(s, a) def calc_reward(self, action): # collision detection dmin = float('inf') danger_dists = [] collision = False for i, human in enumerate(self.humans): dx = human.px - self.robot.px dy = human.py - self.robot.py closest_dist = (dx**2 + dy**2)**(1 / 2) - human.radius - self.robot.radius if closest_dist < self.discomfort_dist: danger_dists.append(closest_dist) if closest_dist < 0: collision = True # logging.debug("Collision: distance between robot and p{} is {:.2E}".format(i, closest_dist)) break elif closest_dist < dmin: dmin = closest_dist # check if reaching the goal reaching_goal = norm( np.array(self.robot.get_position()) - np.array(self.robot.get_goal_position())) < self.robot.radius if self.global_time >= self.time_limit - 1: reward = 0 done = True episode_info = Timeout() elif collision: reward = self.collision_penalty done = True episode_info = Collision() elif reaching_goal: reward = self.success_reward done = True episode_info = ReachGoal() elif dmin < self.discomfort_dist: # only penalize agent for getting too close if it's visible # adjust the reward based on FPS # print(dmin) reward = (dmin - self.discomfort_dist ) * self.discomfort_penalty_factor * self.time_step done = False episode_info = Danger(dmin) else: # potential reward potential_cur = np.linalg.norm( np.array([self.robot.px, self.robot.py]) - np.array(self.robot.get_goal_position())) reward = 2 * (-abs(potential_cur) - self.potential) self.potential = -abs(potential_cur) done = False episode_info = Nothing() # if the robot is near collision/arrival, it should be able to turn a large angle if self.robot.kinematics == 'unicycle': # add a rotational penalty # if action.r is w, factor = -0.02 if w in [-1.5, 1.5], factor = -0.045 if w in [-1, 1]; # if action.r is delta theta, factor = -2 if r in [-0.15, 0.15], factor = -4.5 if r in [-0.1, 0.1] r_spin = -5 * action.r**2 # add a penalty for going backwards if action.v < 0: r_back = -2 * abs(action.v) else: r_back = 0. reward = reward + r_spin + r_back return reward, done, episode_info # compute the observation def generate_ob(self, reset): visible_human_states, num_visible_humans, human_visibility = self.get_num_human_in_fov( ) self.update_last_human_states(human_visibility, reset=reset) if self.robot.policy.name in ['lstm_ppo', 'srnn']: ob = [num_visible_humans] # append robot's state robotS = np.array(self.robot.get_full_state_list()) ob.extend(list(robotS)) ob.extend(list(np.ravel(self.last_human_states))) ob = np.array(ob) else: # for orca and sf ob = self.last_human_states_obj() if self.add_noise: ob = self.apply_noise(ob) return ob def step(self, action, update=True): """ Compute actions for all agents, detect collision, update environment and return (ob, reward, done, info) """ # clip the action to obey robot's constraint action = self.robot.policy.clip_action(action, self.robot.v_pref) # step all humans human_actions = [] # a list of all humans' actions for i, human in enumerate(self.humans): # observation for humans is always coordinates ob = [] for other_human in self.humans: if other_human != human: # Chance for one human to be blind to some other humans if self.random_unobservability and i == 0: if np.random.random( ) <= self.unobservable_chance or not self.detect_visible( human, other_human): ob.append(self.dummy_human.get_observable_state()) else: ob.append(other_human.get_observable_state()) # Else detectable humans are always observable to each other elif self.detect_visible(human, other_human): ob.append(other_human.get_observable_state()) else: ob.append(self.dummy_human.get_observable_state()) if self.robot.visible: # Chance for one human to be blind to robot if self.random_unobservability and i == 0: if np.random.random( ) <= self.unobservable_chance or not self.detect_visible( human, self.robot): ob += [self.dummy_robot.get_observable_state()] else: ob += [self.robot.get_observable_state()] # Else human will always see visible robots elif self.detect_visible(human, self.robot): ob += [self.robot.get_observable_state()] else: ob += [self.dummy_robot.get_observable_state()] human_actions.append(human.act(ob)) # compute reward and episode info reward, done, episode_info = self.calc_reward(action) # apply action and update all agents self.robot.step(action) for i, human_action in enumerate(human_actions): self.humans[i].step(human_action) self.global_time += self.time_step # max episode length=time_limit/time_step ##### compute_ob goes here!!!!! ob = self.generate_ob(reset=False) if self.robot.policy.name in ['srnn']: info = {'info': episode_info} else: # for orca and sf info = episode_info # Update all humans' goals randomly midway through episode if self.random_goal_changing: if self.global_time % 5 == 0: self.update_human_goals_randomly() # Update a specific human's goal once its reached its original goal if self.end_goal_changing: for human in self.humans: if not human.isObstacle and human.v_pref != 0 and norm( (human.gx - human.px, human.gy - human.py)) < human.radius: self.update_human_goal(human) return ob, reward, done, info def render(self, mode='human'): import matplotlib.pyplot as plt import matplotlib.lines as mlines from matplotlib import patches plt.rcParams['animation.ffmpeg_path'] = '/usr/bin/ffmpeg' robot_color = 'yellow' goal_color = 'red' arrow_color = 'red' arrow_style = patches.ArrowStyle("->", head_length=4, head_width=2) def calcFOVLineEndPoint(ang, point, extendFactor): # choose the extendFactor big enough # so that the endPoints of the FOVLine is out of xlim and ylim of the figure FOVLineRot = np.array([[np.cos(ang), -np.sin(ang), 0], [np.sin(ang), np.cos(ang), 0], [0, 0, 1]]) point.extend([1]) # apply rotation matrix newPoint = np.matmul(FOVLineRot, np.reshape(point, [3, 1])) # increase the distance between the line start point and the end point newPoint = [ extendFactor * newPoint[0, 0], extendFactor * newPoint[1, 0], 1 ] return newPoint ax = self.render_axis artists = [] # add goal goal = mlines.Line2D([self.robot.gx], [self.robot.gy], color=goal_color, marker='*', linestyle='None', markersize=15, label='Goal') ax.add_artist(goal) artists.append(goal) # add robot robotX, robotY = self.robot.get_position() robot = plt.Circle((robotX, robotY), self.robot.radius, fill=True, color=robot_color) ax.add_artist(robot) artists.append(robot) plt.legend([robot, goal], ['Robot', 'Goal'], fontsize=16) # compute orientation in each step and add arrow to show the direction radius = self.robot.radius arrowStartEnd = [] robot_theta = self.robot.theta if self.robot.kinematics == 'unicycle' else np.arctan2( self.robot.vy, self.robot.vx) arrowStartEnd.append( ((robotX, robotY), (robotX + radius * np.cos(robot_theta), robotY + radius * np.sin(robot_theta)))) for i, human in enumerate(self.humans): theta = np.arctan2(human.vy, human.vx) arrowStartEnd.append( ((human.px, human.py), (human.px + radius * np.cos(theta), human.py + radius * np.sin(theta)))) arrows = [ patches.FancyArrowPatch(*arrow, color=arrow_color, arrowstyle=arrow_style) for arrow in arrowStartEnd ] for arrow in arrows: ax.add_artist(arrow) artists.append(arrow) # draw FOV for the robot # add robot FOV FOVAng = self.robot_fov / 2 FOVLine1 = mlines.Line2D([0, 0], [0, 0], linestyle='--') FOVLine2 = mlines.Line2D([0, 0], [0, 0], linestyle='--') startPointX = robotX startPointY = robotY endPointX = robotX + radius * np.cos(robot_theta) endPointY = robotY + radius * np.sin(robot_theta) # transform the vector back to world frame origin, apply rotation matrix, and get end point of FOVLine # the start point of the FOVLine is the center of the robot FOVEndPoint1 = calcFOVLineEndPoint( FOVAng, [endPointX - startPointX, endPointY - startPointY], 20. / self.robot.radius) FOVLine1.set_xdata( np.array([startPointX, startPointX + FOVEndPoint1[0]])) FOVLine1.set_ydata( np.array([startPointY, startPointY + FOVEndPoint1[1]])) FOVEndPoint2 = calcFOVLineEndPoint( -FOVAng, [endPointX - startPointX, endPointY - startPointY], 20. / self.robot.radius) FOVLine2.set_xdata( np.array([startPointX, startPointX + FOVEndPoint2[0]])) FOVLine2.set_ydata( np.array([startPointY, startPointY + FOVEndPoint2[1]])) ax.add_artist(FOVLine1) ax.add_artist(FOVLine2) artists.append(FOVLine1) artists.append(FOVLine2) # add humans and change the color of them based on visibility human_circles = [ plt.Circle(human.get_position(), human.radius, fill=False) for human in self.humans ] for i in range(len(self.humans)): ax.add_artist(human_circles[i]) artists.append(human_circles[i]) # green: visible; red: invisible if self.detect_visible(self.robot, self.humans[i], robot1=True): human_circles[i].set_color(c='g') else: human_circles[i].set_color(c='r') plt.pause(0.1) for item in artists: item.remove( ) # there should be a better way to do this. For example,
class NN_tb3(): def __init__(self, env, env_config, policy): # self.env = env self.env_config = env_config # configure robot self.robot = Robot(env_config, 'robot') self.robot.set_policy(policy) self.env.set_robot(self.robot) #pass robot parameters into env self.ob = env.reset( 'test', 1 ) #intial some parameters from .config file such as time_step,success_reward for other instances self.policy = policy self.policy.set_env(env) # for subscribers self.pose = PoseStamped() self.vel = Vector3() self.psi = 0.0 # for publishers self.global_goal = PoseStamped() self.goal = PoseStamped() self.desired_position = PoseStamped() self.desired_action = np.zeros((2, )) # # publishers self.pub_twist = rospy.Publisher('/cmd_vel', Twist, queue_size=1) self.pub_pose_marker = rospy.Publisher('', Marker, queue_size=1) # self.pub_agent_markers = rospy.Publisher('~agent_markers',MarkerArray,queue_size=1) # self.pub_path_marker = rospy.Publisher('~path_marker',Marker,queue_size=1) # self.pub_goal_path_marker = rospy.Publisher('~goal_path_marker',Marker,queue_size=1) # # sub self.sub_pose = rospy.Subscriber('/odom', Odometry, self.cbPose) self.sub_global_goal = rospy.Subscriber('/goal', PoseStamped, self.cbGlobalGoal) # self.sub_subgoal = rospy.Subscriber('~subgoal',PoseStamped, self.cbSubGoal) # subgoals self.sub_goal = Vector3() # self.sub_clusters = rospy.Subscriber('~clusters',Clusters, self.cbClusters) # control timer # self.control_timer = rospy.Timer(rospy.Duration(0.01),self.cbControl) self.nn_timer = rospy.Timer(rospy.Duration(0.3), self.cbComputeActionCrowdNav) def cbGlobalGoal(self, msg): self.stop_moving_flag = True self.new_global_goal_received = True self.global_goal = msg self.goal.pose.position.x = msg.pose.position.x self.goal.pose.position.y = msg.pose.position.y self.goal.header = msg.header # reset subgoals print("new goal: " + str([self.goal.pose.position.x, self.goal.pose.position.y])) def cbSubGoal(self, msg): self.sub_goal.x = msg.pose.position.x self.sub_goal.y = msg.pose.position.y # print "new subgoal: "+str(self.sub_goal) def cbPose(self, msg): self.cbVel(msg) q = msg.pose.pose.orientation self.psi = np.arctan2(2.0 * (q.w * q.z + q.x * q.y), 1 - 2 * (q.y * q.y + q.z * q.z)) # bounded by [-pi, pi] self.pose = msg.pose # self.visualize_pose(msg.pose.pose.position,msg.pose.pose.orientation) def cbVel(self, msg): self.vel = msg.twist.twist.linear def cbClusters(self, msg): other_agents = [] xs = [] ys = [] radii = [] labels = [] num_clusters = len(msg.labels) for i in range(num_clusters): index = msg.labels[i] x = msg.mean_points[i].x y = msg.mean_points[i].y v_x = msg.velocities[i].x v_y = msg.velocities[i].y radius = self.obst_rad xs.append(x) ys.append(y) radii.append(radius) labels.append(index) # self.visualize_other_agent(x,y,radius,msg.labels[i]) # helper fields heading_angle = np.arctan2(v_y, v_x) pref_speed = np.linalg.norm(np.array([v_x, v_y])) goal_x = x + 5.0 goal_y = y + 5.0 if pref_speed < 0.2: pref_speed = 0 v_x = 0 v_y = 0 other_agents.append( agent.Agent(x, y, goal_x, goal_y, radius, pref_speed, heading_angle, index)) self.visualize_other_agents(xs, ys, radii, labels) self.other_agents_state = other_agents def stop_moving(self): twist = Twist() self.pub_twist.publish(twist) def update_action(self, action): # print 'update action' self.desired_action = action self.desired_position.pose.position.x = self.pose.pose.position.x + 1 * action[ 0] * np.cos(action[1]) self.desired_position.pose.position.y = self.pose.pose.position.y + 1 * action[ 0] * np.sin(action[1]) def cbControl(self, event): if self.goal.header.stamp == rospy.Time( 0 ) or self.stop_moving_flag and not self.new_global_goal_received: self.stop_moving() return elif self.operation_mode.mode == self.operation_mode.NN: desired_yaw = self.desired_action[1] yaw_error = desired_yaw - self.psi if abs(yaw_error) > np.pi: yaw_error -= np.sign(yaw_error) * 2 * np.pi gain = 1.3 # canon: 2 vw = gain * yaw_error use_d_min = True if False: # canon: True # use_d_min = True # print "vmax:", self.find_vmax(self.d_min,yaw_error) vx = min(self.desired_action[0], self.find_vmax(self.d_min, yaw_error)) else: vx = self.desired_action[0] twist = Twist() twist.angular.z = vw twist.linear.x = vx self.pub_twist.publish(twist) self.visualize_action(use_d_min) return elif self.operation_mode.mode == self.operation_mode.SPIN_IN_PLACE: print('Spinning in place.') self.stop_moving_flag = False angle_to_goal = np.arctan2(self.global_goal.pose.position.y - self.pose.pose.position.y, \ self.global_goal.pose.position.x - self.pose.pose.position.x) global_yaw_error = self.psi - angle_to_goal if abs(global_yaw_error) > 0.5: vx = 0.0 vw = 1.0 twist = Twist() twist.angular.z = vw twist.linear.x = vx self.pub_twist.publish(twist) # print twist else: print('Done spinning in place') self.operation_mode.mode = self.operation_mode.NN # self.new_global_goal_received = False return else: self.stop_moving() return def cbComputeActionCrowdNav(self, event): robot_x = self.pose.pose.position.x robot_y = self.pose.pose.position.y # goal goal_x = self.global_goal.pose.position.x goal_y = self.global_goal.pose.position.x # velocity robot_vx = self.vel.x robot_vy = self.vel.y # oriantation theta = self.psi robot_radius = 0.3 # set robot info self.robot.set(robot_x, robot_y, goal_x, goal_y, robot_vx, robot_vy, theta, robot_radius) # obstacle: position, velocity, radius # position # obstacle_x = [0.1,0.2,0.3,0.4,0.5] # obstacle_y = [0.1,0.2,0.3,0.4,0.5] # # velocity # obstacle_vx = [0.1,0.2,0.3,0.4,0.5] # obstacle_vy = [0.1,0.2,0.3,0.4,0.5] obstacle_x = [-6.0, -6.0, -6.0, -6.0, -6.0] obstacle_y = [-6.0, -6.0, -6.0, -6.0, -6.0] # velocity obstacle_vx = [0.0, 0.0, 0.0, 0.0, 0.0] obstacle_vy = [0.0, 0.0, 0.0, 0.0, 0.0] obstacle_radius = 0.3 # initial obstacle instances and set value for i in range(self.env_config.getint('sim', 'human_num')): self.env.humans[i].set(obstacle_x[i], obstacle_y[i], goal_x, goal_y, obstacle_vx[i], obstacle_vy[i], theta, obstacle_radius) self.ob[i] = self.env.humans[i].get_observable_state() # ************************************ Output ************************************ # get action info action = self.robot.act(self.ob) position = self.robot.get_observable_state() print('\n---------\nrobot position (X,Y):', position.position) print(action) print(theta) # self.update_action(action) def update_subgoal(self, subgoal): self.goal.pose.position.x = subgoal[0] self.goal.pose.position.y = subgoal[1] def visualize_pose(self, pos, orientation): # Yellow Box for Vehicle marker = Marker() marker.header.stamp = rospy.Time.now() marker.header.frame_id = 'map' marker.ns = 'agent' marker.id = 0 marker.type = marker.CUBE marker.action = marker.ADD marker.pose.position = pos marker.pose.orientation = orientation marker.scale = Vector3(x=0.7, y=0.42, z=1) marker.color = ColorRGBA(r=1.0, g=1.0, a=1.0) marker.lifetime = rospy.Duration(1.0) self.pub_pose_marker.publish(marker) # Red track for trajectory over time marker = Marker() marker.header.stamp = rospy.Time.now() marker.header.frame_id = 'map' marker.ns = 'agent' marker.id = self.num_poses marker.type = marker.CUBE marker.action = marker.ADD marker.pose.position = pos marker.pose.orientation = orientation marker.scale = Vector3(x=0.2, y=0.2, z=0.2) marker.color = ColorRGBA(r=1.0, a=1.0) marker.lifetime = rospy.Duration(10.0) self.pub_pose_marker.publish(marker) def on_shutdown(self): rospy.loginfo("[%s] Shutting down.") self.stop_moving() rospy.loginfo("Stopped %s's velocity.")
# velocity robot_vx = 1 robot_vy = 2 # oriantation theta = 1 robot_radius = 0.3 # set robot info robot.set(robot_x, robot_y, goal_x, goal_y, robot_vx, robot_vy, theta, robot_radius) # obstacle: position, velocity, radius # position obstacle_x = [0.1,0.2,0.3,0.4,0.5] obstacle_y = [0.1,0.2,0.3,0.4,0.5] # velocity obstacle_vx = [0.1,0.2,0.3,0.4,0.5] obstacle_vy = [0.1,0.2,0.3,0.4,0.5] obstacle_radius = 0.3 # initial obstacle instances and set value for i in range(env_config.getint('sim','human_num')): env.humans[i].set(obstacle_x[i], obstacle_y[i], goal_x,goal_y, obstacle_vx[i], obstacle_vy[i], theta, obstacle_radius) ob[i]= env.humans[i].get_observable_state() # ************************************ Output ************************************ # get action info action = robot.act(ob) position = robot.get_observable_state() print('robot position (X,Y):', position.px, ",", position.py) print('robot velocity (X,Y):', action.vx, ",", action.vy)