def estimate_reward(self, state, action): """ If the time step is small enough, it's okay to model agent as linear movement during this period """ # collision detection if isinstance(state, list) or isinstance(state, tuple): state = tensor_to_joint_state(state) human_states = state.human_states robot_state = state.robot_state dmin = float('inf') collision = False for i, human in enumerate(human_states): px = human.px - robot_state.px py = human.py - robot_state.py if self.kinematics == 'holonomic': vx = human.vx - action.vx vy = human.vy - action.vy else: vx = human.vx - action.v * np.cos(action.r + robot_state.theta) vy = human.vy - action.v * np.sin(action.r + robot_state.theta) ex = px + vx * self.time_step ey = py + vy * self.time_step # closest distance between boundaries of two agents closest_dist = point_to_segment_dist( px, py, ex, ey, 0, 0) - human.radius - robot_state.radius if closest_dist < 0: collision = True break elif closest_dist < dmin: dmin = closest_dist # check if reaching the goal if self.kinematics == 'holonomic': px = robot_state.px + action.vx * self.time_step py = robot_state.py + action.vy * self.time_step else: theta = robot_state.theta + action.r px = robot_state.px + np.cos(theta) * action.v * self.time_step py = robot_state.py + np.sin(theta) * action.v * self.time_step end_position = np.array((px, py)) reaching_goal = norm( end_position - np.array([robot_state.gx, robot_state.gy])) < robot_state.radius if collision: reward = -0.25 elif reaching_goal: reward = 1 elif dmin < 0.2: # adjust the reward based on FPS reward = (dmin - 0.2) * 0.5 * self.time_step else: reward = 0 return reward
def step(self, action, simple=True): """ Compute actions for all agents, detect collision, update environment and return (ob, reward, done, info) """ human_actions = [] for human in self.humans: # observation for humans is always coordinates ob = [ other_human.get_observable_state() for other_human in self.humans if other_human != human ] if self.robot.visible: ob += [self.robot.get_observable_state()] human_actions.append(human.act(ob)) # collision detection dmin = float('inf') collision = False for i, human in enumerate(self.humans): px = human.px - self.robot.px py = human.py - self.robot.py if self.robot.kinematics == 'holonomic': vx = human.vx - action.vx vy = human.vy - action.vy else: vx = human.vx - action.v * np.cos(action.r + self.robot.theta) vy = human.vy - action.v * np.sin(action.r + self.robot.theta) ex = px + vx * self.time_step ey = py + vy * self.time_step # closest distance between boundaries of two agents closest_dist = point_to_segment_dist( px, py, ex, ey, 0, 0) - human.radius - self.robot.radius if closest_dist < 0: collision = True # logging.debug("Collision: distance between robot and p{} is {:.2E}".format(i, closest_dist)) break elif closest_dist < dmin: dmin = closest_dist # collision detection between humans human_num = len(self.humans) for i in range(human_num): for j in range(i + 1, human_num): dx = self.humans[i].px - self.humans[j].px dy = self.humans[i].py - self.humans[j].py dist = (dx**2 + dy**2)**( 1 / 2) - self.humans[i].radius - self.humans[j].radius # check if reaching the goal end_position = np.array( self.robot.compute_position(action, self.time_step)) dist_goal = norm(end_position - np.array(self.robot.get_goal_position())) reaching_goal = dist_goal < self.robot.radius if self.global_time >= self.time_limit - 1: reward = 0 done = True info = Timeout() elif collision: reward = self.collision_penalty done = True info = Collision() elif reaching_goal: reward = self.success_reward done = True info = ReachGoal() elif dmin < self.discomfort_dist: # only penalize agent for getting too close if it's visible # adjust the reward based on FPS reward = ( dmin - self.discomfort_dist ) * self.discomfort_penalty_factor * self.time_step - self.time_r done = False info = Danger(dmin) else: reward = -0.1 * dist_goal done = False info = Nothing() # store state, action value and attention weights self.states.append([ self.robot.get_full_state(), [human.get_full_state() for human in self.humans] ]) if hasattr(self.robot.policy, 'action_values'): self.action_values.append(self.robot.policy.action_values) if hasattr(self.robot.policy, 'get_attention_weights'): self.attention_weights.append( self.robot.policy.get_attention_weights()) # update all agents self.robot.step(action) for i, human_action in enumerate(human_actions): self.humans[i].step(human_action) self.global_time += self.time_step for i, human in enumerate(self.humans): # only record the first time the human reaches the goal if self.human_times[i] == 0 and human.reached_destination(): self.human_times[i] = self.global_time # compute the observation if simple: return self.robot.get_full_state() if self.robot.sensor == 'coordinates': ob = [ self.robot.get_full_state() + human.get_observable_state() for human in self.humans ] elif self.robot.sensor == 'RGB': raise NotImplementedError return ob, reward, done, info
def step(self, action, update=True, rebuild=True): """ Compute actions for all agents, detect collision, update environment and return (ob, reward, done, info) """ human_actions = [] for human in self.humans: # observation for humans is always coordinates ob = [other_human.get_observable_state() for other_human in self.humans if other_human != human] if self.robot.visible: ob += [self.robot.get_observable_state()] human_actions.append(human.act(ob)) # collision detection dmin = float('inf') collision = False for i, human in enumerate(self.humans): px = human.px - self.robot.px py = human.py - self.robot.py if self.robot.kinematics == 'holonomic': vx = human.vx - action.vx vy = human.vy - action.vy else: vx = human.vx - action.v * np.cos(action.r + self.robot.theta) vy = human.vy - action.v * np.sin(action.r + self.robot.theta) ex = px + vx * self.time_step ey = py + vy * self.time_step # closest distance between boundaries of two agents closest_dist = point_to_segment_dist(px, py, ex, ey, 0, 0) - human.radius - self.robot.radius if closest_dist < 0: collision = True # logging.debug("Collision: distance between robot and p{} is {:.2E}".format(i, closest_dist)) break elif closest_dist < dmin: dmin = closest_dist # begin = time() self.map.build_map_cpu(self.humans, rebuild) # if not update: # print("rebuild: ", rebuild, "build map time: ", 81 * (time() - begin)) agent_fullstate = FullState(self.robot.px + action.vx * self.agent_timestep, self.robot.py + action.vy * self.agent_timestep, self.robot.vx, self.robot.vy, self.robot.radius, self.robot.gx, self.robot.gy, self.robot.v_pref, self.robot.theta) # begin = time() collision_probabbility = self.map.compute_occupied_probability(agent_fullstate) # if not update: # print("rebuild: ", rebuild, "build map time: ", 81 * (time() - begin)) stationary_dist = ((self.robot.py + self.circle_radius)**2 + (self.robot.px**2))**(1 / 2) # collision detection between humans human_num = len(self.humans) for i in range(human_num): for j in range(i + 1, human_num): dx = self.humans[i].px - self.humans[j].px dy = self.humans[i].py - self.humans[j].py dist = (dx**2 + dy**2)**(1 / 2) - self.humans[i].radius - self.humans[j].radius if dist < 0: # detect collision but don't take humans' collision into account logging.debug('Collision happens between humans in step()') # check if reaching the goal end_position = np.array(self.robot.compute_position(action, self.time_step)) reaching_goal = norm(end_position - np.array(self.robot.get_goal_position())) < self.robot.radius stationary_state = False if self.robot.kinematics == 'holonomic': if abs(action.vx) <= 0.0001 and abs(action.vy) <= 0.0001: stationary_state = True else: if abs(action.v) <= 0.0001: stationary_state = True if self.global_time >= self.time_limit - 1: reward = 0 done = True info = Timeout() elif collision: reward = self.collision_penalty done = True info = Collision() elif reaching_goal: reward = self.success_reward done = True info = ReachGoal() else: reward = -0.25 * collision_probabbility * self.reward_increment done = False if dmin < self.discomfort_dist: info = Danger(dmin) else: info = Nothing() if update: # store state, action value and attention weights self.states.append([self.robot.get_full_state(), [human.get_full_state() for human in self.humans]]) if hasattr(self.robot.policy, 'action_values'): self.action_values.append(self.robot.policy.action_values) if hasattr(self.robot.policy, 'get_attention_weights'): self.attention_weights.append(self.robot.policy.get_attention_weights()) # update all agents self.robot.step(action) for i, human_action in enumerate(human_actions): self.humans[i].step(human_action) self.global_time += self.time_step for i, human in enumerate(self.humans): # only record the first time the human reaches the goal if self.human_times[i] == 0 and human.reached_destination(): self.human_times[i] = self.global_time # compute the observation if self.robot.sensor == 'coordinates': ob = [human.get_observable_state() for human in self.humans] elif self.robot.sensor == 'RGB': raise NotImplementedError else: if self.robot.sensor == 'coordinates': ob = [human.get_next_observable_state(action) for human, action in zip(self.humans, human_actions)] elif self.robot.sensor == 'RGB': raise NotImplementedError # self.agent_prev_vx = action.vx # self.agent_prev_vy = action.vy if self.robot.kinematics == 'holonomic': self.agent_prev_vx = action.vx self.agent_prev_vy = action.vy else: self.agent_prev_vx = action.v * np.cos(action.r + self.robot.theta) self.agent_prev_vy = action.v * np.sin(action.r + self.robot.theta) return ob, reward, done, info
def step(self, action, update=True): """ Compute actions for all agents, detect collision, update environment and return (ob, reward, done, info) """ # get human actions if self.centralized_planning: agent_states = [human.get_full_state() for human in self.humans] if self.robot.visible: agent_states.append(self.robot.get_full_state()) human_actions = self.centralized_planner.predict(agent_states)[:-1] else: human_actions = self.centralized_planner.predict(agent_states) else: human_actions = [] for human in self.humans: ob = self.compute_observation_for(human) human_actions.append(human.act(ob)) # social score violation calculation heading_rect_violations = 0 robot_rect = AgentHeadingRect(self.robot.px, self.robot.py, self.robot.radius, self.robot.vx, self.robot.vy, self.robot.kinematics, action) for human in self.humans: human_rect = AgentHeadingRect(human.px, human.py, human.radius, human.vx, human.vy, human.kinematics) if robot_rect.intersects(human_rect): heading_rect_violations += 1 # collision detection dmin = float('inf') collision = False for i, human in enumerate(self.humans): px = human.px - self.robot.px py = human.py - self.robot.py if self.robot.kinematics == 'holonomic': vx = human.vx - action.vx vy = human.vy - action.vy else: vx = human.vx - action.v * np.cos(action.r + self.robot.theta) vy = human.vy - action.v * np.sin(action.r + self.robot.theta) ex = px + vx * self.time_step ey = py + vy * self.time_step # closest distance between boundaries of two agents closest_dist = point_to_segment_dist(px, py, ex, ey, 0, 0) - human.radius - self.robot.radius if closest_dist < 0: collision = True logging.debug("Collision: distance between robot and p{} is {:.2E} at time {:.2E}".format(human.id, closest_dist, self.global_time)) break elif closest_dist < dmin: dmin = closest_dist # collision detection between humans human_num = len(self.humans) for i in range(human_num): for j in range(i + 1, human_num): dx = self.humans[i].px - self.humans[j].px dy = self.humans[i].py - self.humans[j].py dist = (dx ** 2 + dy ** 2) ** (1 / 2) - self.humans[i].radius - self.humans[j].radius if dist < 0: # detect collision but don't take humans' collision into account logging.debug('Collision happens between humans in step()') # check if reaching the goal end_position = np.array(self.robot.compute_position(action, self.time_step)) reaching_goal = norm(end_position - np.array(self.robot.get_goal_position())) < self.robot.radius # calculate step jerk cost ax = action.vx - self.robot.vx ay = action.vy - self.robot.vy d_ax = ax - self.last_acceleration[0] d_ay = ay - self.last_acceleration[1] jerk_cost = d_ax**2 + d_ay**2 self.last_acceleration = (ax, ay) # check for left-hand / right-hand rules side_preference = None if self.test_scenario in self.all_two_agent_scenarios: h = self.humans[0] # If agent's y position is within the y range occupied by the other human (they are on the same vertical position) if ((h.py - h.radius) <= end_position[1] <= (h.py + h.radius)): if end_position[0] < h.px: side_preference = 0 else: side_preference = 1 # state information logging info = {} info['aggregated_time'] = 1 info['min_separation'] = dmin info['social_violation_cnt'] = heading_rect_violations info['jerk_cost'] = jerk_cost info['speed'] = math.sqrt(action.vx**2 + action.vy**2) info['side_preference'] = side_preference if dmin < self.min_personal_space: info['personal_violation_cnt'] = 1 else: info['personal_violation_cnt'] = 0 if self.global_time >= self.time_limit - 1: info['aggregated_time'] = math.inf reward = 0 done = True info['event'] = Timeout() elif collision: info['aggregated_time'] = math.inf reward = self.collision_penalty done = True info['event'] = Collision() elif reaching_goal: reward = self.success_reward done = True info['event'] = ReachGoal() elif dmin < self.discomfort_dist: # adjust the reward based on FPS reward = (dmin - self.discomfort_dist) * self.discomfort_penalty_factor * self.time_step done = False info['event'] = Discomfort(dmin) else: reward = 0 done = False info['event'] = Nothing() for human in self.humans: if not human.reached_destination(): info['aggregated_time'] += 1 # update environment if update: # store state, action value and attention weights if hasattr(self.robot.policy, 'action_values'): self.action_values.append(self.robot.policy.action_values) if hasattr(self.robot.policy, 'get_attention_weights'): self.attention_weights.append(self.robot.policy.get_attention_weights()) if hasattr(self.robot.policy, 'get_matrix_A'): self.As.append(self.robot.policy.get_matrix_A()) if hasattr(self.robot.policy, 'get_feat'): self.feats.append(self.robot.policy.get_feat()) if hasattr(self.robot.policy, 'get_X'): self.Xs.append(self.robot.policy.get_X()) if hasattr(self.robot.policy, 'traj'): self.trajs.append(self.robot.policy.get_traj()) # update all agents self.robot.step(action) for human, action in zip(self.humans, human_actions): human.step(action) if self.nonstop_human and human.reached_destination(): self.generate_human(human) self.global_time += self.time_step self.states.append([self.robot.get_full_state(), [human.get_full_state() for human in self.humans], [human.id for human in self.humans]]) self.robot_actions.append(action) self.rewards.append(reward) self.infos.append(info) # compute the observation if self.robot.sensor == 'coordinates': ob = self.compute_observation_for(self.robot) elif self.robot.sensor == 'RGB': raise NotImplementedError else: if self.robot.sensor == 'coordinates': ob = [human.get_next_observable_state(action) for human, action in zip(self.humans, human_actions)] elif self.robot.sensor == 'RGB': raise NotImplementedError return ob, reward, done, info
def step(self, action, update=True): """ Compute actions for all agents, detect collision, update environment and return (ob, reward, done, info) """ if self.centralized_planning: agent_states = [human.get_full_state() for human in self.humans] if self.robot.visible: agent_states.append(self.robot.get_full_state()) human_actions = self.centralized_planner.predict( agent_states)[:-1] else: human_actions = self.centralized_planner.predict(agent_states) else: human_actions = [] for human in self.humans: ob = self.compute_observation_for(human) human_actions.append(human.act(ob)) # collision detection dmin = float('inf') collision = False for i, human in enumerate(self.humans): px = human.px - self.robot.px py = human.py - self.robot.py if self.robot.kinematics == 'holonomic': vx = human.vx - action.vx vy = human.vy - action.vy else: vx = human.vx - action.v * np.cos(action.r + self.robot.theta) vy = human.vy - action.v * np.sin(action.r + self.robot.theta) ex = px + vx * self.time_step ey = py + vy * self.time_step # closest distance between boundaries of two agents closest_dist = point_to_segment_dist( px, py, ex, ey, 0, 0) - human.radius - self.robot.radius if closest_dist < 0: collision = True logging.debug( "Collision: distance between robot and p{} is {:.2E} at time {:.2E}" .format(human.id, closest_dist, self.global_time)) break elif closest_dist < dmin: dmin = closest_dist # collision detection between humans human_num = len(self.humans) for i in range(human_num): for j in range(i + 1, human_num): dx = self.humans[i].px - self.humans[j].px dy = self.humans[i].py - self.humans[j].py dist = (dx**2 + dy**2)**( 1 / 2) - self.humans[i].radius - self.humans[j].radius if dist < 0: # detect collision but don't take humans' collision into account logging.debug('Collision happens between humans in step()') # check if reaching the goal end_position = np.array( self.robot.compute_position(action, self.time_step)) reaching_goal = norm( end_position - np.array(self.robot.get_goal_position())) < self.robot.radius if self.global_time >= self.time_limit - 1: reward = 0 done = True info = Timeout() elif collision: reward = self.collision_penalty done = True info = Collision() elif reaching_goal: reward = self.success_reward done = True info = ReachGoal() elif dmin < self.discomfort_dist: # adjust the reward based on FPS reward = (dmin - self.discomfort_dist ) * self.discomfort_penalty_factor * self.time_step done = False info = Discomfort(dmin) else: reward = 0 done = False info = Nothing() if update: # store state, action value and attention weights if hasattr(self.robot.policy, 'action_values'): self.action_values.append(self.robot.policy.action_values) if hasattr(self.robot.policy, 'get_attention_weights'): self.attention_weights.append( self.robot.policy.get_attention_weights()) if hasattr(self.robot.policy, 'get_matrix_A'): self.As.append(self.robot.policy.get_matrix_A()) if hasattr(self.robot.policy, 'get_feat'): self.feats.append(self.robot.policy.get_feat()) if hasattr(self.robot.policy, 'get_X'): self.Xs.append(self.robot.policy.get_X()) if hasattr(self.robot.policy, 'traj'): self.trajs.append(self.robot.policy.get_traj()) # update all agents self.robot.step(action) for human, action in zip(self.humans, human_actions): human.step(action) if self.nonstop_human and human.reached_destination(): self.generate_human(human) self.global_time += self.time_step self.states.append([ self.robot.get_full_state(), [human.get_full_state() for human in self.humans], [human.id for human in self.humans] ]) self.robot_actions.append(action) self.rewards.append(reward) # compute the observation if self.robot.sensor == 'coordinates': ob = self.compute_observation_for(self.robot) elif self.robot.sensor == 'RGB': raise NotImplementedError else: if self.robot.sensor == 'coordinates': ob = [ human.get_next_observable_state(action) for human, action in zip(self.humans, human_actions) ] elif self.robot.sensor == 'RGB': raise NotImplementedError return ob, reward, done, info
def step(self, action, non_attentive_humans, update=True): """ Compute actions for all agents, detect collision, update environment and return (ob, reward, done, info) """ human_actions = [] # print(self.global_time) # count = self.global_time # non_attentive_humans = old_non_attentive_humans # if count != 0: # # old_non_attentive_humans = [] # # else: # old_non_attentive_humans = non_attentive_humans # # only record the first time the human reaches the goal # # if count % 4 == 0: # print("true") # # non_attentive_humans = random.sample(self.humans, int(len(self.humans) / 2)) # old_non_attentive_humans = non_attentive_humans # # # else: # # non_attentive_humans = old_non_attentive_humans # non_attentive_humans = set(non_attentive_humans) for i, human in enumerate(self.humans): # observation for humans is always coordinates # ob = [other_human.get_observable_state() for other_human in self.humans if other_human != human] if human not in non_attentive_humans: # print("attentive") human.set_attentive() ob = [ other_human.get_observable_state() for other_human in self.humans if other_human != human ] # if self.robot.visible: ob += [self.robot.get_observable_state()] elif human in non_attentive_humans: # print("disattentive") ob = [ other_human.get_observable_state() for other_human in self.humans if other_human != human ] human.set_non_attentive() # print(human.get_observable_state()) human_actions.append(human.act(ob)) if human.reached_destination(): if np.random.random() > 0.5: sign = -1 else: sign = 1 human.set(human.px, human.py, np.random.random() * self.square_width * 0.5 * -sign, (np.random.random() - 0.5) * self.square_width, 0, 0, 0) # collision detection dmin = float('inf') collision = False ppl_count = 0 closest_distance_all = [] for i, human in enumerate(self.humans): px = human.px - self.robot.px py = human.py - self.robot.py if self.robot.kinematics == 'holonomic': vx = human.vx - action.vx vy = human.vy - action.vy else: vx = human.vx - action.v * np.cos(action.r + self.robot.theta) vy = human.vy - action.v * np.sin(action.r + self.robot.theta) ex = px + vx * self.time_step ey = py + vy * self.time_step # closest distance between boundaries of two agents closest_dist = point_to_segment_dist( px, py, ex, ey, 0, 0) - human.radius - self.robot.radius if closest_dist < 0: collision = True # logging.debug("Collision: distance between robot and p{} is {:.2E}".format(i, closest_dist)) # break elif closest_dist < dmin: dmin = closest_dist elif closest_dist < 3: ppl_count += 1 # collision detection between humans human_num = len(self.humans) for i in range(human_num): for j in range(i + 1, human_num): dx = self.humans[i].px - self.humans[j].px dy = self.humans[i].py - self.humans[j].py dist = (dx**2 + dy**2)**( 1 / 2) - self.humans[i].radius - self.humans[j].radius if dist < 0: # detect collision but don't take humans' collision into account logging.debug('Collision happens between humans in step()') # check if reaching the goal end_position = np.array( self.robot.compute_position(action, self.time_step)) reaching_goal = norm( end_position - np.array(self.robot.get_goal_position())) < self.robot.radius if self.global_time >= self.time_limit - 1: reward = 0 done = True info = Timeout() elif collision: reward = self.collision_penalty done = True info = Collision() elif reaching_goal: reward = self.success_reward done = True info = ReachGoal() elif dmin < self.discomfort_dist: # only penalize agent for getting too close if it's visible # adjust the reward based on FPS reward = (dmin - self.discomfort_dist ) * self.discomfort_penalty_factor * self.time_step done = False info = Danger(dmin) else: reward = 0 done = False info = Nothing() if update: # store state, action value and attention weights self.states.append([ self.robot.get_full_state(), [human.get_full_state() for human in self.humans] ]) if hasattr(self.robot.policy, 'action_values'): self.action_values.append(self.robot.policy.action_values) if hasattr(self.robot.policy, 'get_attention_weights'): self.attention_weights.append( self.robot.policy.get_attention_weights()) # update all agents self.robot.step(action) for i, human_action in enumerate(human_actions): self.humans[i].step(human_action) self.global_time += self.time_step for i, human in enumerate(self.humans): # only record the first time the human reaches the goal if self.human_times[i] == 0 and human.reached_destination(): self.human_times[i] = self.global_time # compute the observation if self.robot.sensor == 'coordinates': ob = [human.get_observable_state() for human in self.humans] elif self.robot.sensor == 'RGB': raise NotImplementedError else: if self.robot.sensor == 'coordinates': ob = [ human.get_next_observable_state(action) for human, action in zip(self.humans, human_actions) ] elif self.robot.sensor == 'RGB': raise NotImplementedError return ob, reward, done, info, ppl_count, self.robot.get_position( ), self.robot.get_velocity(), dmin
def estimate_reward_on_predictor(self, state, next_state): """ If the time step is small enough, it's okay to model agent as linear movement during this period """ # collision detection info = Nothing() if isinstance(state, list) or isinstance(state, tuple): state = tensor_to_joint_state(state) human_states = state.human_states robot_state = state.robot_state weight_goal = self.goal_factor weight_safe = self.discomfort_penalty_factor weight_terminal = 1.0 re_collision = self.collision_penalty re_arrival = self.success_reward next_robot_state = next_state.robot_state next_human_states = next_state.human_states cur_position = np.array((robot_state.px, robot_state.py)) end_position = np.array((next_robot_state.px, next_robot_state.py)) goal_position = np.array((robot_state.gx, robot_state.gy)) reward_goal = (norm(cur_position - goal_position) - norm(end_position - goal_position)) # check if reaching the goal reaching_goal = norm( end_position - np.array([robot_state.gx, robot_state.gy])) < robot_state.radius dmin = float('inf') collision = False safety_penalty = 0 if human_states is None or next_human_states is None: safety_penalty = 0.0 collision = False else: for i, human in enumerate(human_states): next_human = next_human_states[i] px = human.px - robot_state.px py = human.py - robot_state.py ex = next_human.px - next_robot_state.px ey = next_human.py - next_robot_state.py # closest distance between boundaries of two agents closest_dist = point_to_segment_dist( px, py, ex, ey, 0, 0) - human.radius - robot_state.radius if closest_dist < 0: collision = True if closest_dist < dmin: dmin = closest_dist if closest_dist < self.discomfort_dist: safety_penalty = safety_penalty + (closest_dist - self.discomfort_dist) # dis_begin = np.sqrt(px ** 2 + py ** 2) - human.radius - robot_state.radius # dis_end = np.sqrt(ex ** 2 + ey ** 2) - human.radius - robot_state.radius # penalty_begin = 0 # penalty_end = 0 # discomfort_dist = 0.5 # if dis_begin < discomfort_dist: # penalty_begin = dis_begin - discomfort_dist # if dis_end < discomfort_dist: # penalty_end = dis_end - discomfort_dist # safety_penalty = safety_penalty + (penalty_end - penalty_begin) reward_col = 0 reward_arrival = 0 if collision: reward_col = re_collision info = Collision() elif reaching_goal: reward_arrival = re_arrival info = ReachGoal() reward_terminal = reward_col + reward_arrival reward = weight_terminal * reward_terminal + weight_goal * reward_goal + weight_safe * safety_penalty # if collision: # reward = reward - 100 return reward, info