Ejemplo n.º 1
0
    def _check_for_episode_termination(self, reset_rules_status, agents_info_map):
        '''Check for whether a episode should be terminated

        Args:
            reset_rules_status: dictionary of reset rules status with key as reset rule names and value as
                                reset rule bool status
            agents_info_map: dictionary of agents info map with key as agent name and value as agent info

        Returns:
            tuple (string, bool, bool): episode status, pause flag, and done flag
        '''
        episode_status = EpisodeStatus.get_episode_status(reset_rules_status)
        pause = False
        done = False
        # Note: check EPISODE_COMPLETE as the first item because agent might crash
        # at the finish line.
        if EpisodeStatus.EPISODE_COMPLETE.value in reset_rules_status and \
                reset_rules_status[EpisodeStatus.EPISODE_COMPLETE.value]:
            done = True
            episode_status = EpisodeStatus.EPISODE_COMPLETE.value
        elif EpisodeStatus.CRASHED.value in reset_rules_status and \
                reset_rules_status[EpisodeStatus.CRASHED.value]:
            # check crash with all other objects besides static obstacle
            crashed_object_name = agents_info_map[self._agent_name_][AgentInfo.CRASHED_OBJECT_NAME.value]
            if 'obstacle' not in crashed_object_name:
                current_progress = agents_info_map[self._agent_name_][AgentInfo.CURRENT_PROGRESS.value]
                crashed_object_progress = agents_info_map[crashed_object_name]\
                                              [AgentInfo.CURRENT_PROGRESS.value]
                if current_progress < crashed_object_progress:
                    done, pause = self._check_for_phase_change()
            else:
                done, pause = self._check_for_phase_change()
        elif any(reset_rules_status.values()):
            done, pause = self._check_for_phase_change()
        return episode_status, pause, done
    def _check_for_episode_termination(self, reset_rules_status,
                                       agents_info_map):
        '''Check for whether a episode should be terminated

        Args:
            reset_rules_status: dictionary of reset rules status with key as reset rule names and value as
                                reset rule bool status
            agents_info_map: dictionary of agents info map with key as agent name and value as agent info

        Returns:
            tuple (string, bool, bool): episode status, pause flag, and done flag
        '''
        episode_status = EpisodeStatus.get_episode_status(reset_rules_status)
        pause = False
        done = False
        # Note: check EPISODE_COMPLETE as the first item because agent might crash
        # at the finish line.
        if EpisodeStatus.EPISODE_COMPLETE.value in reset_rules_status and \
                reset_rules_status[EpisodeStatus.EPISODE_COMPLETE.value]:
            done = True
            episode_status = EpisodeStatus.EPISODE_COMPLETE.value
        elif EpisodeStatus.CRASHED.value in reset_rules_status and \
                reset_rules_status[EpisodeStatus.CRASHED.value]:
            # only check for crash when at RUN phase
            if self._ctrl_status[
                    AgentCtrlStatus.AGENT_PHASE.value] == AgentPhase.RUN.value:
                self._curr_crashed_object_name = agents_info_map[
                    self._agent_name_][AgentInfo.CRASHED_OBJECT_NAME.value]
                # check crash with all other objects besides static obstacle
                if 'obstacle' not in self._curr_crashed_object_name:
                    current_progress = agents_info_map[self._agent_name_][
                        AgentInfo.CURRENT_PROGRESS.value]
                    crashed_obj_info = agents_info_map[
                        self._curr_crashed_object_name]
                    crashed_obj_progress = crashed_obj_info[
                        AgentInfo.CURRENT_PROGRESS.value]
                    crashed_obj_start_ndist = crashed_obj_info[
                        AgentInfo.START_NDIST.value]
                    crashed_object_progress = get_normalized_progress(
                        crashed_obj_progress,
                        start_ndist=crashed_obj_start_ndist)
                    current_progress = get_normalized_progress(
                        current_progress,
                        start_ndist=self._data_dict_['start_ndist'])
                    if current_progress < crashed_object_progress:
                        done, pause = self._check_for_phase_change()
                    else:
                        episode_status = EpisodeStatus.IN_PROGRESS.value
                else:
                    done, pause = self._check_for_phase_change()
            else:
                pause = True
        elif any(reset_rules_status.values()):
            done, pause = self._check_for_phase_change()
        return episode_status, pause, done
    def judge_action(self, action):
        try:
            # Get the position of the agent
            pos_dict = self._track_data_.get_agent_pos(
                self._agent_name_, self._agent_link_name_list_,
                const.RELATIVE_POSITION_OF_FRONT_OF_CAR)
            model_point = pos_dict[AgentPos.POINT.value]
            # Compute the next index
            current_ndist = self._track_data_.get_norm_dist(model_point)
            prev_index, next_index = self._track_data_.find_prev_next_waypoints(
                current_ndist, normalized=True, reverse_dir=self._reverse_dir_)
            # Set the basic reward and training metrics
            set_reward_and_metrics(self._reward_params_, self._step_metrics_,
                                   pos_dict, self._track_data_, next_index,
                                   prev_index, action, self._json_actions_)
            # Convert current progress to be [0,100] starting at the initial waypoint
            if self._reverse_dir_:
                self._reward_params_[const.RewardParam.LEFT_CENT.value[0]] = \
                    not self._reward_params_[const.RewardParam.LEFT_CENT.value[0]]
                current_progress = self._data_dict_[
                    'start_ndist'] - current_ndist
            else:
                current_progress = current_ndist - self._data_dict_[
                    'start_ndist']

            current_progress = compute_current_prog(
                current_progress, self._data_dict_['prev_progress'])
            self._data_dict_['steps'] += 1
            # Add the agen specific metrics
            self._step_metrics_[StepMetrics.STEPS.value] = \
            self._reward_params_[const.RewardParam.STEPS.value[0]] = self._data_dict_['steps']
            self._reward_params_[
                const.RewardParam.REVERSE.value[0]] = self._reverse_dir_
            self._step_metrics_[StepMetrics.PROG.value] = \
            self._reward_params_[const.RewardParam.PROG.value[0]] = current_progress
        except Exception as ex:
            raise GenericRolloutException(
                'Cannot find position: {}'.format(ex))

        # This code should be replaced with the contact sensor code
        is_crashed = False
        model_heading = self._reward_params_[
            const.RewardParam.HEADING.value[0]]

        obstacle_reward_params = self._track_data_.get_object_reward_params(
            model_point, model_heading, current_progress, self._reverse_dir_)
        if obstacle_reward_params:
            self._reward_params_.update(obstacle_reward_params)
            is_crashed = self._track_data_.is_racecar_collided(
                pos_dict[AgentPos.LINK_POINTS.value])

        prev_pnt_dist = min(
            model_point.distance(self._prev_waypoints_['prev_point']),
            model_point.distance(self._prev_waypoints_['prev_point_2']))

        is_off_track = not any(
            self._track_data_.points_on_track(
                pos_dict[AgentPos.LINK_POINTS.value]))
        is_immobilized = (prev_pnt_dist <= 0.0001
                          and self._data_dict_['steps'] %
                          const.NUM_STEPS_TO_CHECK_STUCK == 0)
        is_lap_complete = current_progress >= 100.0

        self._reward_params_[const.RewardParam.CRASHED.value[0]] = is_crashed
        self._reward_params_[
            const.RewardParam.OFFTRACK.value[0]] = is_off_track

        done = is_crashed or is_immobilized or is_off_track or is_lap_complete
        episode_status = EpisodeStatus.get_episode_status(
            is_crashed=is_crashed,
            is_immobilized=is_immobilized,
            is_off_track=is_off_track,
            is_lap_complete=is_lap_complete)
        try:
            reward = float(self._reward_(copy.deepcopy(self._reward_params_)))
        except Exception as ex:
            raise RewardFunctionError(
                'Reward function exception {}'.format(ex))
        if math.isnan(reward) or math.isinf(reward):
            raise RewardFunctionError('{} returned as reward'.format(reward))

        self._prev_waypoints_['prev_point_2'] = self._prev_waypoints_[
            'prev_point']
        self._prev_waypoints_['prev_point'] = model_point
        self._data_dict_['prev_progress'] = current_progress
        #Get the last of the step metrics
        self._step_metrics_[StepMetrics.REWARD.value] = reward
        self._step_metrics_[StepMetrics.DONE.value] = done
        self._step_metrics_[StepMetrics.TIME.value] = time.time()
        self._step_metrics_[
            StepMetrics.EPISODE_STATUS.value] = episode_status.value

        return reward, done, self._step_metrics_