def _check_for_episode_termination(self, reset_rules_status, agents_info_map): '''Check for whether a episode should be terminated Args: reset_rules_status: dictionary of reset rules status with key as reset rule names and value as reset rule bool status agents_info_map: dictionary of agents info map with key as agent name and value as agent info Returns: tuple (string, bool, bool): episode status, pause flag, and done flag ''' episode_status = EpisodeStatus.get_episode_status(reset_rules_status) pause = False done = False # Note: check EPISODE_COMPLETE as the first item because agent might crash # at the finish line. if EpisodeStatus.EPISODE_COMPLETE.value in reset_rules_status and \ reset_rules_status[EpisodeStatus.EPISODE_COMPLETE.value]: done = True episode_status = EpisodeStatus.EPISODE_COMPLETE.value elif EpisodeStatus.CRASHED.value in reset_rules_status and \ reset_rules_status[EpisodeStatus.CRASHED.value]: # check crash with all other objects besides static obstacle crashed_object_name = agents_info_map[self._agent_name_][AgentInfo.CRASHED_OBJECT_NAME.value] if 'obstacle' not in crashed_object_name: current_progress = agents_info_map[self._agent_name_][AgentInfo.CURRENT_PROGRESS.value] crashed_object_progress = agents_info_map[crashed_object_name]\ [AgentInfo.CURRENT_PROGRESS.value] if current_progress < crashed_object_progress: done, pause = self._check_for_phase_change() else: done, pause = self._check_for_phase_change() elif any(reset_rules_status.values()): done, pause = self._check_for_phase_change() return episode_status, pause, done
def append_episode_metrics(self): self._number_of_trials_ += 1 eval_metric = dict() eval_metric['completion_percentage'] = int(self._progress_) eval_metric['metric_time'] = int(round(time.time() * 1000)) eval_metric['start_time'] = int(round(self._start_time_ * 1000)) eval_metric['elapsed_time_in_milliseconds'] = \ int(round((time.time() - self._start_time_) * 1000)) eval_metric['trial'] = int(self._number_of_trials_) eval_metric['episode_status'] = EpisodeStatus.get_episode_status_label( self._episode_status) eval_metric['crash_count'] = self.reset_count_dict[ EpisodeStatus.CRASHED.value] eval_metric['immobilized_count'] = self.reset_count_dict[ EpisodeStatus.IMMOBILIZED.value] eval_metric['off_track_count'] = self.reset_count_dict[ EpisodeStatus.OFF_TRACK.value] eval_metric['reversed_count'] = self.reset_count_dict[ EpisodeStatus.REVERSED.value] eval_metric['reset_count'] = eval_metric['crash_count'] + \ eval_metric['immobilized_count'] + \ eval_metric['off_track_count'] + \ eval_metric['reversed_count'] self._best_lap_time = min(eval_metric['elapsed_time_in_milliseconds'], self._best_lap_time) self._total_evaluation_time += eval_metric[ 'elapsed_time_in_milliseconds'] self._metrics_.append(eval_metric)
def _check_for_episode_termination(self, reset_rules_status, agents_info_map): '''Check for whether a episode should be terminated Args: reset_rules_status: dictionary of reset rules status with key as reset rule names and value as reset rule bool status agents_info_map: dictionary of agents info map with key as agent name and value as agent info Returns: tuple (string, bool, bool): episode status, pause flag, and done flag ''' episode_status = EpisodeStatus.get_episode_status(reset_rules_status) pause = False done = False # Note: check EPISODE_COMPLETE as the first item because agent might crash # at the finish line. if EpisodeStatus.EPISODE_COMPLETE.value in reset_rules_status and \ reset_rules_status[EpisodeStatus.EPISODE_COMPLETE.value]: done = True episode_status = EpisodeStatus.EPISODE_COMPLETE.value elif EpisodeStatus.CRASHED.value in reset_rules_status and \ reset_rules_status[EpisodeStatus.CRASHED.value]: # only check for crash when at RUN phase if self._ctrl_status[ AgentCtrlStatus.AGENT_PHASE.value] == AgentPhase.RUN.value: self._curr_crashed_object_name = agents_info_map[ self._agent_name_][AgentInfo.CRASHED_OBJECT_NAME.value] # check crash with all other objects besides static obstacle if 'obstacle' not in self._curr_crashed_object_name: current_progress = agents_info_map[self._agent_name_][ AgentInfo.CURRENT_PROGRESS.value] crashed_obj_info = agents_info_map[ self._curr_crashed_object_name] crashed_obj_progress = crashed_obj_info[ AgentInfo.CURRENT_PROGRESS.value] crashed_obj_start_ndist = crashed_obj_info[ AgentInfo.START_NDIST.value] crashed_object_progress = get_normalized_progress( crashed_obj_progress, start_ndist=crashed_obj_start_ndist) current_progress = get_normalized_progress( current_progress, start_ndist=self._data_dict_['start_ndist']) if current_progress < crashed_object_progress: done, pause = self._check_for_phase_change() else: episode_status = EpisodeStatus.IN_PROGRESS.value else: done, pause = self._check_for_phase_change() else: pause = True elif any(reset_rules_status.values()): done, pause = self._check_for_phase_change() return episode_status, pause, done
def append_episode_metrics(self): self._episode_ += 1 if not self._is_eval_ else 0 self._eval_trials_ += 1 if not self._is_eval_ else 0 training_metric = dict() training_metric['reward_score'] = int(round(self._episode_reward_)) training_metric['metric_time'] = int(round(self._current_sim_time * 1000)) training_metric['start_time'] = int(round(self._start_time_ * 1000)) training_metric['elapsed_time_in_milliseconds'] = \ int(round((self._current_sim_time - self._start_time_) * 1000)) training_metric['episode'] = int(self._episode_) training_metric['trial'] = int(self._eval_trials_) training_metric['phase'] = 'evaluation' if self._is_eval_ else 'training' training_metric['completion_percentage'] = int(self._progress_) training_metric['episode_status'] = EpisodeStatus.get_episode_status_label(self._episode_status) self._metrics_.append(training_metric)
def upload_episode_metrics(self): self._number_of_trials_ += 1 eval_metric = dict() eval_metric['completion_percentage'] = int(self._progress_) eval_metric['metric_time'] = int(round(time.time() * 1000)) eval_metric['start_time'] = int(round(self._start_time_ * 1000)) eval_metric['elapsed_time_in_milliseconds'] = \ int(round((time.time() - self._start_time_) * 1000)) eval_metric['trial'] = int(self._number_of_trials_) eval_metric['episode_status'] = EpisodeStatus.get_episode_status_label( self._episode_status) self._metrics_.append(eval_metric) write_metrics_to_s3(self._s3_dict_[MetricsS3Keys.METRICS_BUCKET.value], self._s3_dict_[MetricsS3Keys.METRICS_KEY.value], self._s3_dict_[MetricsS3Keys.REGION.value], self._metrics_) self._simtrace_data_.upload_to_s3(self._number_of_trials_)
def append_episode_metrics(self): self._episode_ += 1 if not self._is_eval_ else 0 self._eval_trials_ += 1 if not self._is_eval_ else 0 training_metric = dict() training_metric["reward_score"] = int(round(self._episode_reward_)) training_metric["metric_time"] = int( round(self._current_sim_time * 1000)) training_metric["start_time"] = int(round(self._start_time_ * 1000)) training_metric["elapsed_time_in_milliseconds"] = int( round((self._current_sim_time - self._start_time_) * 1000)) training_metric["episode"] = int(self._episode_) training_metric["trial"] = int(self._eval_trials_) training_metric[ "phase"] = "evaluation" if self._is_eval_ else "training" training_metric["completion_percentage"] = int(self._progress_) training_metric[ "episode_status"] = EpisodeStatus.get_episode_status_label( self._episode_status) self._metrics_.append(training_metric)
def append_episode_metrics(self, is_complete=True): if not is_complete and self._number_of_trials_ != 0: # Note: for virtual event, if the racer did not even finish one lap # for the duration of the event, we display DNF. # However, our friends at the game want the DNF ranks as well # so we append the incomplete metrics for ppl who didn't finish # first lap LOGGER.info( "Appending episode metrics for incomplete lap skipped, laps completed %s", self._number_of_trials_, ) return eval_metric = dict() eval_metric["completion_percentage"] = int(self._progress_) eval_metric["metric_time"] = int(round(self._current_sim_time * 1000)) eval_metric["start_time"] = int(round(self._start_time_ * 1000)) eval_metric["elapsed_time_in_milliseconds"] = int( round((self._current_sim_time - self._start_time_) * 1000)) eval_metric["episode_status"] = EpisodeStatus.get_episode_status_label( self._episode_status) eval_metric["crash_count"] = self.reset_count_dict[ EpisodeStatus.CRASHED.value] eval_metric["immobilized_count"] = self.reset_count_dict[ EpisodeStatus.IMMOBILIZED.value] eval_metric["off_track_count"] = self.reset_count_dict[ EpisodeStatus.OFF_TRACK.value] eval_metric["reversed_count"] = self.reset_count_dict[ EpisodeStatus.REVERSED.value] eval_metric["reset_count"] = (eval_metric["crash_count"] + eval_metric["immobilized_count"] + eval_metric["off_track_count"] + eval_metric["reversed_count"]) if is_complete: self._number_of_trials_ += 1 self._best_lap_time = min( eval_metric["elapsed_time_in_milliseconds"], self._best_lap_time) self._total_evaluation_time += eval_metric[ "elapsed_time_in_milliseconds"] eval_metric["trial"] = int(self._number_of_trials_) self._metrics_.append(eval_metric)
def judge_action(self, action): try: # Get the position of the agent pos_dict = self._track_data_.get_agent_pos( self._agent_name_, self._agent_link_name_list_, const.RELATIVE_POSITION_OF_FRONT_OF_CAR) model_point = pos_dict[AgentPos.POINT.value] # Compute the next index current_ndist = self._track_data_.get_norm_dist(model_point) prev_index, next_index = self._track_data_.find_prev_next_waypoints( current_ndist, normalized=True, reverse_dir=self._reverse_dir_) # Set the basic reward and training metrics set_reward_and_metrics(self._reward_params_, self._step_metrics_, pos_dict, self._track_data_, next_index, prev_index, action, self._json_actions_) # Convert current progress to be [0,100] starting at the initial waypoint if self._reverse_dir_: self._reward_params_[const.RewardParam.LEFT_CENT.value[0]] = \ not self._reward_params_[const.RewardParam.LEFT_CENT.value[0]] current_progress = self._data_dict_[ 'start_ndist'] - current_ndist else: current_progress = current_ndist - self._data_dict_[ 'start_ndist'] current_progress = compute_current_prog( current_progress, self._data_dict_['prev_progress']) self._data_dict_['steps'] += 1 # Add the agen specific metrics self._step_metrics_[StepMetrics.STEPS.value] = \ self._reward_params_[const.RewardParam.STEPS.value[0]] = self._data_dict_['steps'] self._reward_params_[ const.RewardParam.REVERSE.value[0]] = self._reverse_dir_ self._step_metrics_[StepMetrics.PROG.value] = \ self._reward_params_[const.RewardParam.PROG.value[0]] = current_progress except Exception as ex: raise GenericRolloutException( 'Cannot find position: {}'.format(ex)) # This code should be replaced with the contact sensor code is_crashed = False model_heading = self._reward_params_[ const.RewardParam.HEADING.value[0]] obstacle_reward_params = self._track_data_.get_object_reward_params( model_point, model_heading, current_progress, self._reverse_dir_) if obstacle_reward_params: self._reward_params_.update(obstacle_reward_params) is_crashed = self._track_data_.is_racecar_collided( pos_dict[AgentPos.LINK_POINTS.value]) prev_pnt_dist = min( model_point.distance(self._prev_waypoints_['prev_point']), model_point.distance(self._prev_waypoints_['prev_point_2'])) is_off_track = not any( self._track_data_.points_on_track( pos_dict[AgentPos.LINK_POINTS.value])) is_immobilized = (prev_pnt_dist <= 0.0001 and self._data_dict_['steps'] % const.NUM_STEPS_TO_CHECK_STUCK == 0) is_lap_complete = current_progress >= 100.0 self._reward_params_[const.RewardParam.CRASHED.value[0]] = is_crashed self._reward_params_[ const.RewardParam.OFFTRACK.value[0]] = is_off_track done = is_crashed or is_immobilized or is_off_track or is_lap_complete episode_status = EpisodeStatus.get_episode_status( is_crashed=is_crashed, is_immobilized=is_immobilized, is_off_track=is_off_track, is_lap_complete=is_lap_complete) try: reward = float(self._reward_(copy.deepcopy(self._reward_params_))) except Exception as ex: raise RewardFunctionError( 'Reward function exception {}'.format(ex)) if math.isnan(reward) or math.isinf(reward): raise RewardFunctionError('{} returned as reward'.format(reward)) self._prev_waypoints_['prev_point_2'] = self._prev_waypoints_[ 'prev_point'] self._prev_waypoints_['prev_point'] = model_point self._data_dict_['prev_progress'] = current_progress #Get the last of the step metrics self._step_metrics_[StepMetrics.REWARD.value] = reward self._step_metrics_[StepMetrics.DONE.value] = done self._step_metrics_[StepMetrics.TIME.value] = time.time() self._step_metrics_[ StepMetrics.EPISODE_STATUS.value] = episode_status.value return reward, done, self._step_metrics_