Beispiel #1
0
    def _check_for_episode_termination(self, reset_rules_status, agents_info_map):
        '''Check for whether a episode should be terminated

        Args:
            reset_rules_status: dictionary of reset rules status with key as reset rule names and value as
                                reset rule bool status
            agents_info_map: dictionary of agents info map with key as agent name and value as agent info

        Returns:
            tuple (string, bool, bool): episode status, pause flag, and done flag
        '''
        episode_status = EpisodeStatus.get_episode_status(reset_rules_status)
        pause = False
        done = False
        # Note: check EPISODE_COMPLETE as the first item because agent might crash
        # at the finish line.
        if EpisodeStatus.EPISODE_COMPLETE.value in reset_rules_status and \
                reset_rules_status[EpisodeStatus.EPISODE_COMPLETE.value]:
            done = True
            episode_status = EpisodeStatus.EPISODE_COMPLETE.value
        elif EpisodeStatus.CRASHED.value in reset_rules_status and \
                reset_rules_status[EpisodeStatus.CRASHED.value]:
            # check crash with all other objects besides static obstacle
            crashed_object_name = agents_info_map[self._agent_name_][AgentInfo.CRASHED_OBJECT_NAME.value]
            if 'obstacle' not in crashed_object_name:
                current_progress = agents_info_map[self._agent_name_][AgentInfo.CURRENT_PROGRESS.value]
                crashed_object_progress = agents_info_map[crashed_object_name]\
                                              [AgentInfo.CURRENT_PROGRESS.value]
                if current_progress < crashed_object_progress:
                    done, pause = self._check_for_phase_change()
            else:
                done, pause = self._check_for_phase_change()
        elif any(reset_rules_status.values()):
            done, pause = self._check_for_phase_change()
        return episode_status, pause, done
Beispiel #2
0
 def append_episode_metrics(self):
     self._number_of_trials_ += 1
     eval_metric = dict()
     eval_metric['completion_percentage'] = int(self._progress_)
     eval_metric['metric_time'] = int(round(time.time() * 1000))
     eval_metric['start_time'] = int(round(self._start_time_ * 1000))
     eval_metric['elapsed_time_in_milliseconds'] = \
         int(round((time.time() - self._start_time_) * 1000))
     eval_metric['trial'] = int(self._number_of_trials_)
     eval_metric['episode_status'] = EpisodeStatus.get_episode_status_label(
         self._episode_status)
     eval_metric['crash_count'] = self.reset_count_dict[
         EpisodeStatus.CRASHED.value]
     eval_metric['immobilized_count'] = self.reset_count_dict[
         EpisodeStatus.IMMOBILIZED.value]
     eval_metric['off_track_count'] = self.reset_count_dict[
         EpisodeStatus.OFF_TRACK.value]
     eval_metric['reversed_count'] = self.reset_count_dict[
         EpisodeStatus.REVERSED.value]
     eval_metric['reset_count'] = eval_metric['crash_count'] + \
                                  eval_metric['immobilized_count'] + \
                                  eval_metric['off_track_count'] + \
                                  eval_metric['reversed_count']
     self._best_lap_time = min(eval_metric['elapsed_time_in_milliseconds'],
                               self._best_lap_time)
     self._total_evaluation_time += eval_metric[
         'elapsed_time_in_milliseconds']
     self._metrics_.append(eval_metric)
    def _check_for_episode_termination(self, reset_rules_status,
                                       agents_info_map):
        '''Check for whether a episode should be terminated

        Args:
            reset_rules_status: dictionary of reset rules status with key as reset rule names and value as
                                reset rule bool status
            agents_info_map: dictionary of agents info map with key as agent name and value as agent info

        Returns:
            tuple (string, bool, bool): episode status, pause flag, and done flag
        '''
        episode_status = EpisodeStatus.get_episode_status(reset_rules_status)
        pause = False
        done = False
        # Note: check EPISODE_COMPLETE as the first item because agent might crash
        # at the finish line.
        if EpisodeStatus.EPISODE_COMPLETE.value in reset_rules_status and \
                reset_rules_status[EpisodeStatus.EPISODE_COMPLETE.value]:
            done = True
            episode_status = EpisodeStatus.EPISODE_COMPLETE.value
        elif EpisodeStatus.CRASHED.value in reset_rules_status and \
                reset_rules_status[EpisodeStatus.CRASHED.value]:
            # only check for crash when at RUN phase
            if self._ctrl_status[
                    AgentCtrlStatus.AGENT_PHASE.value] == AgentPhase.RUN.value:
                self._curr_crashed_object_name = agents_info_map[
                    self._agent_name_][AgentInfo.CRASHED_OBJECT_NAME.value]
                # check crash with all other objects besides static obstacle
                if 'obstacle' not in self._curr_crashed_object_name:
                    current_progress = agents_info_map[self._agent_name_][
                        AgentInfo.CURRENT_PROGRESS.value]
                    crashed_obj_info = agents_info_map[
                        self._curr_crashed_object_name]
                    crashed_obj_progress = crashed_obj_info[
                        AgentInfo.CURRENT_PROGRESS.value]
                    crashed_obj_start_ndist = crashed_obj_info[
                        AgentInfo.START_NDIST.value]
                    crashed_object_progress = get_normalized_progress(
                        crashed_obj_progress,
                        start_ndist=crashed_obj_start_ndist)
                    current_progress = get_normalized_progress(
                        current_progress,
                        start_ndist=self._data_dict_['start_ndist'])
                    if current_progress < crashed_object_progress:
                        done, pause = self._check_for_phase_change()
                    else:
                        episode_status = EpisodeStatus.IN_PROGRESS.value
                else:
                    done, pause = self._check_for_phase_change()
            else:
                pause = True
        elif any(reset_rules_status.values()):
            done, pause = self._check_for_phase_change()
        return episode_status, pause, done
Beispiel #4
0
 def append_episode_metrics(self):
     self._episode_ += 1 if not self._is_eval_ else 0
     self._eval_trials_ += 1 if not self._is_eval_ else 0
     training_metric = dict()
     training_metric['reward_score'] = int(round(self._episode_reward_))
     training_metric['metric_time'] = int(round(self._current_sim_time * 1000))
     training_metric['start_time'] = int(round(self._start_time_ * 1000))
     training_metric['elapsed_time_in_milliseconds'] = \
         int(round((self._current_sim_time - self._start_time_) * 1000))
     training_metric['episode'] = int(self._episode_)
     training_metric['trial'] = int(self._eval_trials_)
     training_metric['phase'] = 'evaluation' if self._is_eval_ else 'training'
     training_metric['completion_percentage'] = int(self._progress_)
     training_metric['episode_status'] = EpisodeStatus.get_episode_status_label(self._episode_status)
     self._metrics_.append(training_metric)
 def upload_episode_metrics(self):
     self._number_of_trials_ += 1
     eval_metric = dict()
     eval_metric['completion_percentage'] = int(self._progress_)
     eval_metric['metric_time'] = int(round(time.time() * 1000))
     eval_metric['start_time'] = int(round(self._start_time_ * 1000))
     eval_metric['elapsed_time_in_milliseconds'] = \
         int(round((time.time() - self._start_time_) * 1000))
     eval_metric['trial'] = int(self._number_of_trials_)
     eval_metric['episode_status'] = EpisodeStatus.get_episode_status_label(
         self._episode_status)
     self._metrics_.append(eval_metric)
     write_metrics_to_s3(self._s3_dict_[MetricsS3Keys.METRICS_BUCKET.value],
                         self._s3_dict_[MetricsS3Keys.METRICS_KEY.value],
                         self._s3_dict_[MetricsS3Keys.REGION.value],
                         self._metrics_)
     self._simtrace_data_.upload_to_s3(self._number_of_trials_)
 def append_episode_metrics(self):
     self._episode_ += 1 if not self._is_eval_ else 0
     self._eval_trials_ += 1 if not self._is_eval_ else 0
     training_metric = dict()
     training_metric["reward_score"] = int(round(self._episode_reward_))
     training_metric["metric_time"] = int(
         round(self._current_sim_time * 1000))
     training_metric["start_time"] = int(round(self._start_time_ * 1000))
     training_metric["elapsed_time_in_milliseconds"] = int(
         round((self._current_sim_time - self._start_time_) * 1000))
     training_metric["episode"] = int(self._episode_)
     training_metric["trial"] = int(self._eval_trials_)
     training_metric[
         "phase"] = "evaluation" if self._is_eval_ else "training"
     training_metric["completion_percentage"] = int(self._progress_)
     training_metric[
         "episode_status"] = EpisodeStatus.get_episode_status_label(
             self._episode_status)
     self._metrics_.append(training_metric)
 def append_episode_metrics(self, is_complete=True):
     if not is_complete and self._number_of_trials_ != 0:
         # Note: for virtual event, if the racer did not even finish one lap
         # for the duration of the event, we display DNF.
         # However, our friends at the game want the DNF ranks as well
         # so we append the incomplete metrics for ppl who didn't finish
         # first lap
         LOGGER.info(
             "Appending episode metrics for incomplete lap skipped, laps completed %s",
             self._number_of_trials_,
         )
         return
     eval_metric = dict()
     eval_metric["completion_percentage"] = int(self._progress_)
     eval_metric["metric_time"] = int(round(self._current_sim_time * 1000))
     eval_metric["start_time"] = int(round(self._start_time_ * 1000))
     eval_metric["elapsed_time_in_milliseconds"] = int(
         round((self._current_sim_time - self._start_time_) * 1000))
     eval_metric["episode_status"] = EpisodeStatus.get_episode_status_label(
         self._episode_status)
     eval_metric["crash_count"] = self.reset_count_dict[
         EpisodeStatus.CRASHED.value]
     eval_metric["immobilized_count"] = self.reset_count_dict[
         EpisodeStatus.IMMOBILIZED.value]
     eval_metric["off_track_count"] = self.reset_count_dict[
         EpisodeStatus.OFF_TRACK.value]
     eval_metric["reversed_count"] = self.reset_count_dict[
         EpisodeStatus.REVERSED.value]
     eval_metric["reset_count"] = (eval_metric["crash_count"] +
                                   eval_metric["immobilized_count"] +
                                   eval_metric["off_track_count"] +
                                   eval_metric["reversed_count"])
     if is_complete:
         self._number_of_trials_ += 1
         self._best_lap_time = min(
             eval_metric["elapsed_time_in_milliseconds"],
             self._best_lap_time)
         self._total_evaluation_time += eval_metric[
             "elapsed_time_in_milliseconds"]
     eval_metric["trial"] = int(self._number_of_trials_)
     self._metrics_.append(eval_metric)
    def judge_action(self, action):
        try:
            # Get the position of the agent
            pos_dict = self._track_data_.get_agent_pos(
                self._agent_name_, self._agent_link_name_list_,
                const.RELATIVE_POSITION_OF_FRONT_OF_CAR)
            model_point = pos_dict[AgentPos.POINT.value]
            # Compute the next index
            current_ndist = self._track_data_.get_norm_dist(model_point)
            prev_index, next_index = self._track_data_.find_prev_next_waypoints(
                current_ndist, normalized=True, reverse_dir=self._reverse_dir_)
            # Set the basic reward and training metrics
            set_reward_and_metrics(self._reward_params_, self._step_metrics_,
                                   pos_dict, self._track_data_, next_index,
                                   prev_index, action, self._json_actions_)
            # Convert current progress to be [0,100] starting at the initial waypoint
            if self._reverse_dir_:
                self._reward_params_[const.RewardParam.LEFT_CENT.value[0]] = \
                    not self._reward_params_[const.RewardParam.LEFT_CENT.value[0]]
                current_progress = self._data_dict_[
                    'start_ndist'] - current_ndist
            else:
                current_progress = current_ndist - self._data_dict_[
                    'start_ndist']

            current_progress = compute_current_prog(
                current_progress, self._data_dict_['prev_progress'])
            self._data_dict_['steps'] += 1
            # Add the agen specific metrics
            self._step_metrics_[StepMetrics.STEPS.value] = \
            self._reward_params_[const.RewardParam.STEPS.value[0]] = self._data_dict_['steps']
            self._reward_params_[
                const.RewardParam.REVERSE.value[0]] = self._reverse_dir_
            self._step_metrics_[StepMetrics.PROG.value] = \
            self._reward_params_[const.RewardParam.PROG.value[0]] = current_progress
        except Exception as ex:
            raise GenericRolloutException(
                'Cannot find position: {}'.format(ex))

        # This code should be replaced with the contact sensor code
        is_crashed = False
        model_heading = self._reward_params_[
            const.RewardParam.HEADING.value[0]]

        obstacle_reward_params = self._track_data_.get_object_reward_params(
            model_point, model_heading, current_progress, self._reverse_dir_)
        if obstacle_reward_params:
            self._reward_params_.update(obstacle_reward_params)
            is_crashed = self._track_data_.is_racecar_collided(
                pos_dict[AgentPos.LINK_POINTS.value])

        prev_pnt_dist = min(
            model_point.distance(self._prev_waypoints_['prev_point']),
            model_point.distance(self._prev_waypoints_['prev_point_2']))

        is_off_track = not any(
            self._track_data_.points_on_track(
                pos_dict[AgentPos.LINK_POINTS.value]))
        is_immobilized = (prev_pnt_dist <= 0.0001
                          and self._data_dict_['steps'] %
                          const.NUM_STEPS_TO_CHECK_STUCK == 0)
        is_lap_complete = current_progress >= 100.0

        self._reward_params_[const.RewardParam.CRASHED.value[0]] = is_crashed
        self._reward_params_[
            const.RewardParam.OFFTRACK.value[0]] = is_off_track

        done = is_crashed or is_immobilized or is_off_track or is_lap_complete
        episode_status = EpisodeStatus.get_episode_status(
            is_crashed=is_crashed,
            is_immobilized=is_immobilized,
            is_off_track=is_off_track,
            is_lap_complete=is_lap_complete)
        try:
            reward = float(self._reward_(copy.deepcopy(self._reward_params_)))
        except Exception as ex:
            raise RewardFunctionError(
                'Reward function exception {}'.format(ex))
        if math.isnan(reward) or math.isinf(reward):
            raise RewardFunctionError('{} returned as reward'.format(reward))

        self._prev_waypoints_['prev_point_2'] = self._prev_waypoints_[
            'prev_point']
        self._prev_waypoints_['prev_point'] = model_point
        self._data_dict_['prev_progress'] = current_progress
        #Get the last of the step metrics
        self._step_metrics_[StepMetrics.REWARD.value] = reward
        self._step_metrics_[StepMetrics.DONE.value] = done
        self._step_metrics_[StepMetrics.TIME.value] = time.time()
        self._step_metrics_[
            StepMetrics.EPISODE_STATUS.value] = episode_status.value

        return reward, done, self._step_metrics_