def _check_for_episode_termination(self, reset_rules_status, agents_info_map): '''Check for whether a episode should be terminated Args: reset_rules_status: dictionary of reset rules status with key as reset rule names and value as reset rule bool status agents_info_map: dictionary of agents info map with key as agent name and value as agent info Returns: tuple (string, bool, bool): episode status, pause flag, and done flag ''' episode_status = EpisodeStatus.get_episode_status(reset_rules_status) pause = False done = False # Note: check EPISODE_COMPLETE as the first item because agent might crash # at the finish line. if EpisodeStatus.EPISODE_COMPLETE.value in reset_rules_status and \ reset_rules_status[EpisodeStatus.EPISODE_COMPLETE.value]: done = True episode_status = EpisodeStatus.EPISODE_COMPLETE.value elif EpisodeStatus.CRASHED.value in reset_rules_status and \ reset_rules_status[EpisodeStatus.CRASHED.value]: # check crash with all other objects besides static obstacle crashed_object_name = agents_info_map[self._agent_name_][AgentInfo.CRASHED_OBJECT_NAME.value] if 'obstacle' not in crashed_object_name: current_progress = agents_info_map[self._agent_name_][AgentInfo.CURRENT_PROGRESS.value] crashed_object_progress = agents_info_map[crashed_object_name]\ [AgentInfo.CURRENT_PROGRESS.value] if current_progress < crashed_object_progress: done, pause = self._check_for_phase_change() else: done, pause = self._check_for_phase_change() elif any(reset_rules_status.values()): done, pause = self._check_for_phase_change() return episode_status, pause, done
def _check_for_episode_termination(self, reset_rules_status, agents_info_map): '''Check for whether a episode should be terminated Args: reset_rules_status: dictionary of reset rules status with key as reset rule names and value as reset rule bool status agents_info_map: dictionary of agents info map with key as agent name and value as agent info Returns: tuple (string, bool, bool): episode status, pause flag, and done flag ''' episode_status = EpisodeStatus.get_episode_status(reset_rules_status) pause = False done = False # Note: check EPISODE_COMPLETE as the first item because agent might crash # at the finish line. if EpisodeStatus.EPISODE_COMPLETE.value in reset_rules_status and \ reset_rules_status[EpisodeStatus.EPISODE_COMPLETE.value]: done = True episode_status = EpisodeStatus.EPISODE_COMPLETE.value elif EpisodeStatus.CRASHED.value in reset_rules_status and \ reset_rules_status[EpisodeStatus.CRASHED.value]: # only check for crash when at RUN phase if self._ctrl_status[ AgentCtrlStatus.AGENT_PHASE.value] == AgentPhase.RUN.value: self._curr_crashed_object_name = agents_info_map[ self._agent_name_][AgentInfo.CRASHED_OBJECT_NAME.value] # check crash with all other objects besides static obstacle if 'obstacle' not in self._curr_crashed_object_name: current_progress = agents_info_map[self._agent_name_][ AgentInfo.CURRENT_PROGRESS.value] crashed_obj_info = agents_info_map[ self._curr_crashed_object_name] crashed_obj_progress = crashed_obj_info[ AgentInfo.CURRENT_PROGRESS.value] crashed_obj_start_ndist = crashed_obj_info[ AgentInfo.START_NDIST.value] crashed_object_progress = get_normalized_progress( crashed_obj_progress, start_ndist=crashed_obj_start_ndist) current_progress = get_normalized_progress( current_progress, start_ndist=self._data_dict_['start_ndist']) if current_progress < crashed_object_progress: done, pause = self._check_for_phase_change() else: episode_status = EpisodeStatus.IN_PROGRESS.value else: done, pause = self._check_for_phase_change() else: pause = True elif any(reset_rules_status.values()): done, pause = self._check_for_phase_change() return episode_status, pause, done
def judge_action(self, action): try: # Get the position of the agent pos_dict = self._track_data_.get_agent_pos( self._agent_name_, self._agent_link_name_list_, const.RELATIVE_POSITION_OF_FRONT_OF_CAR) model_point = pos_dict[AgentPos.POINT.value] # Compute the next index current_ndist = self._track_data_.get_norm_dist(model_point) prev_index, next_index = self._track_data_.find_prev_next_waypoints( current_ndist, normalized=True, reverse_dir=self._reverse_dir_) # Set the basic reward and training metrics set_reward_and_metrics(self._reward_params_, self._step_metrics_, pos_dict, self._track_data_, next_index, prev_index, action, self._json_actions_) # Convert current progress to be [0,100] starting at the initial waypoint if self._reverse_dir_: self._reward_params_[const.RewardParam.LEFT_CENT.value[0]] = \ not self._reward_params_[const.RewardParam.LEFT_CENT.value[0]] current_progress = self._data_dict_[ 'start_ndist'] - current_ndist else: current_progress = current_ndist - self._data_dict_[ 'start_ndist'] current_progress = compute_current_prog( current_progress, self._data_dict_['prev_progress']) self._data_dict_['steps'] += 1 # Add the agen specific metrics self._step_metrics_[StepMetrics.STEPS.value] = \ self._reward_params_[const.RewardParam.STEPS.value[0]] = self._data_dict_['steps'] self._reward_params_[ const.RewardParam.REVERSE.value[0]] = self._reverse_dir_ self._step_metrics_[StepMetrics.PROG.value] = \ self._reward_params_[const.RewardParam.PROG.value[0]] = current_progress except Exception as ex: raise GenericRolloutException( 'Cannot find position: {}'.format(ex)) # This code should be replaced with the contact sensor code is_crashed = False model_heading = self._reward_params_[ const.RewardParam.HEADING.value[0]] obstacle_reward_params = self._track_data_.get_object_reward_params( model_point, model_heading, current_progress, self._reverse_dir_) if obstacle_reward_params: self._reward_params_.update(obstacle_reward_params) is_crashed = self._track_data_.is_racecar_collided( pos_dict[AgentPos.LINK_POINTS.value]) prev_pnt_dist = min( model_point.distance(self._prev_waypoints_['prev_point']), model_point.distance(self._prev_waypoints_['prev_point_2'])) is_off_track = not any( self._track_data_.points_on_track( pos_dict[AgentPos.LINK_POINTS.value])) is_immobilized = (prev_pnt_dist <= 0.0001 and self._data_dict_['steps'] % const.NUM_STEPS_TO_CHECK_STUCK == 0) is_lap_complete = current_progress >= 100.0 self._reward_params_[const.RewardParam.CRASHED.value[0]] = is_crashed self._reward_params_[ const.RewardParam.OFFTRACK.value[0]] = is_off_track done = is_crashed or is_immobilized or is_off_track or is_lap_complete episode_status = EpisodeStatus.get_episode_status( is_crashed=is_crashed, is_immobilized=is_immobilized, is_off_track=is_off_track, is_lap_complete=is_lap_complete) try: reward = float(self._reward_(copy.deepcopy(self._reward_params_))) except Exception as ex: raise RewardFunctionError( 'Reward function exception {}'.format(ex)) if math.isnan(reward) or math.isinf(reward): raise RewardFunctionError('{} returned as reward'.format(reward)) self._prev_waypoints_['prev_point_2'] = self._prev_waypoints_[ 'prev_point'] self._prev_waypoints_['prev_point'] = model_point self._data_dict_['prev_progress'] = current_progress #Get the last of the step metrics self._step_metrics_[StepMetrics.REWARD.value] = reward self._step_metrics_[StepMetrics.DONE.value] = done self._step_metrics_[StepMetrics.TIME.value] = time.time() self._step_metrics_[ StepMetrics.EPISODE_STATUS.value] = episode_status.value return reward, done, self._step_metrics_