def __init__(self, agent_name, s3_dict_metrics, deepracer_checkpoint_json, ckpnt_dir, run_phase_sink, use_model_picker=True): '''s3_dict_metrics - Dictionary containing the required s3 info for the metrics bucket with keys specified by MetricsS3Keys deepracer_checkpoint_json - DeepracerCheckpointJson instance ckpnt_dir - Directory where the current checkpont is to be stored run_phase_sink - Sink to recieve notification of a change in run phase use_model_picker - Flag to whether to use model picker or not. ''' self._agent_name_ = agent_name self._deepracer_checkpoint_json = deepracer_checkpoint_json self._s3_metrics = Metrics( bucket=s3_dict_metrics[MetricsS3Keys.METRICS_BUCKET.value], s3_key=s3_dict_metrics[MetricsS3Keys.METRICS_KEY.value], region_name=s3_dict_metrics[MetricsS3Keys.REGION.value], s3_endpoint_url=s3_dict_metrics[MetricsS3Keys.ENDPOINT_URL.value]) self._start_time_ = time.time() self._episode_ = 0 self._episode_reward_ = 0.0 self._progress_ = 0.0 self._episode_status = '' self._metrics_ = list() self._is_eval_ = True self._eval_trials_ = 0 self._checkpoint_state_ = CheckpointStateFile(ckpnt_dir) self._use_model_picker = use_model_picker self._eval_stats_dict_ = {'chkpnt_name': None, 'avg_comp_pct': -1.0} self._best_chkpnt_stats = { 'name': None, 'avg_comp_pct': -1.0, 'time_stamp': time.time() } self._current_eval_pct_list_ = list() self.is_save_simtrace_enabled = rospy.get_param( 'SIMTRACE_S3_BUCKET', None) self.track_data = TrackData.get_instance() run_phase_sink.register(self) # Create the agent specific directories needed for storing the metric files self._simtrace_local_path = SIMTRACE_TRAINING_LOCAL_PATH_FORMAT.format( self._agent_name_) simtrace_dirname = os.path.dirname(self._simtrace_local_path) if simtrace_dirname or not os.path.exists(simtrace_dirname): os.makedirs(simtrace_dirname) self._current_sim_time = 0 rospy.Service("/{}/{}".format(self._agent_name_, "mp4_video_metrics"), VideoMetricsSrv, self._handle_get_video_metrics) self._video_metrics = Mp4VideoMetrics.get_empty_dict() AbstractTracker.__init__(self, TrackerPriority.HIGH)
def __init__(self, agent_name, s3_dict_metrics, is_continuous): '''Init eval metrics Args: agent_name (string): agent name s3_dict_metrics (dict): Dictionary containing the required s3 info for the metrics bucket with keys specified by MetricsS3Keys is_continuous (bool): True if continuous race, False otherwise ''' self._agent_name_ = agent_name self._s3_metrics = Metrics( bucket=s3_dict_metrics[MetricsS3Keys.METRICS_BUCKET.value], s3_key=s3_dict_metrics[MetricsS3Keys.METRICS_KEY.value], region_name=s3_dict_metrics[MetricsS3Keys.REGION.value], s3_endpoint_url=s3_dict_metrics[MetricsS3Keys.ENDPOINT_URL.value]) self._is_continuous = is_continuous self._start_time_ = time.time() self._number_of_trials_ = 0 self._progress_ = 0.0 self._episode_status = '' self._metrics_ = list() # This is used to calculate the actual distance traveled by the car self._agent_xy = list() self._prev_step_time = time.time() self.is_save_simtrace_enabled = rospy.get_param( 'SIMTRACE_S3_BUCKET', None) # Create the agent specific directories needed for storing the metric files self._simtrace_local_path = SIMTRACE_EVAL_LOCAL_PATH_FORMAT.format( self._agent_name_) simtrace_dirname = os.path.dirname(self._simtrace_local_path) if simtrace_dirname or not os.path.exists(simtrace_dirname): os.makedirs(simtrace_dirname) self.reset_count_dict = { EpisodeStatus.CRASHED.value: 0, EpisodeStatus.OFF_TRACK.value: 0, EpisodeStatus.IMMOBILIZED.value: 0, EpisodeStatus.REVERSED.value: 0 } self._best_lap_time = float('inf') self._total_evaluation_time = 0 self._video_metrics = Mp4VideoMetrics.get_empty_dict() self._reset_count_sum = 0 self._current_sim_time = 0 self.track_data = TrackData.get_instance() rospy.Service("/{}/{}".format(self._agent_name_, "mp4_video_metrics"), VideoMetricsSrv, self._handle_get_video_metrics) AbstractTracker.__init__(self, TrackerPriority.HIGH)
class TrainingMetrics(MetricsInterface, ObserverInterface, AbstractTracker): '''This class is responsible for uploading training metrics to s3''' def __init__(self, agent_name, s3_dict_metrics, deepracer_checkpoint_json, ckpnt_dir, run_phase_sink, use_model_picker=True): '''s3_dict_metrics - Dictionary containing the required s3 info for the metrics bucket with keys specified by MetricsS3Keys deepracer_checkpoint_json - DeepracerCheckpointJson instance ckpnt_dir - Directory where the current checkpont is to be stored run_phase_sink - Sink to recieve notification of a change in run phase use_model_picker - Flag to whether to use model picker or not. ''' self._agent_name_ = agent_name self._deepracer_checkpoint_json = deepracer_checkpoint_json self._s3_metrics = Metrics( bucket=s3_dict_metrics[MetricsS3Keys.METRICS_BUCKET.value], s3_key=s3_dict_metrics[MetricsS3Keys.METRICS_KEY.value], region_name=s3_dict_metrics[MetricsS3Keys.REGION.value]) self._start_time_ = time.time() self._episode_ = 0 self._episode_reward_ = 0.0 self._progress_ = 0.0 self._episode_status = '' self._metrics_ = list() self._is_eval_ = True self._eval_trials_ = 0 self._checkpoint_state_ = CheckpointStateFile(ckpnt_dir) self._use_model_picker = use_model_picker self._eval_stats_dict_ = {'chkpnt_name': None, 'avg_comp_pct': -1.0} self._best_chkpnt_stats = { 'name': None, 'avg_comp_pct': -1.0, 'time_stamp': time.time() } self._current_eval_pct_list_ = list() self.is_save_simtrace_enabled = rospy.get_param( 'SIMTRACE_S3_BUCKET', None) self.track_data = TrackData.get_instance() run_phase_sink.register(self) # Create the agent specific directories needed for storing the metric files self._simtrace_local_path = SIMTRACE_TRAINING_LOCAL_PATH_FORMAT.format( self._agent_name_) simtrace_dirname = os.path.dirname(self._simtrace_local_path) if simtrace_dirname or not os.path.exists(simtrace_dirname): os.makedirs(simtrace_dirname) self._current_sim_time = 0 rospy.Service("/{}/{}".format(self._agent_name_, "mp4_video_metrics"), VideoMetricsSrv, self._handle_get_video_metrics) self._video_metrics = Mp4VideoMetrics.get_empty_dict() AbstractTracker.__init__(self, TrackerPriority.HIGH) def update_tracker(self, delta_time, sim_time): """ Callback when sim time is updated Args: delta_time (float): time diff from last call sim_time (Clock): simulation time """ self._current_sim_time = sim_time.clock.secs + 1.e-9 * sim_time.clock.nsecs def reset(self): self._start_time_ = self._current_sim_time self._episode_reward_ = 0.0 self._progress_ = 0.0 def append_episode_metrics(self): self._episode_ += 1 if not self._is_eval_ else 0 self._eval_trials_ += 1 if not self._is_eval_ else 0 training_metric = dict() training_metric['reward_score'] = int(round(self._episode_reward_)) training_metric['metric_time'] = int( round(self._current_sim_time * 1000)) training_metric['start_time'] = int(round(self._start_time_ * 1000)) training_metric['elapsed_time_in_milliseconds'] = \ int(round((self._current_sim_time - self._start_time_) * 1000)) training_metric['episode'] = int(self._episode_) training_metric['trial'] = int(self._eval_trials_) training_metric[ 'phase'] = 'evaluation' if self._is_eval_ else 'training' training_metric['completion_percentage'] = int(self._progress_) training_metric[ 'episode_status'] = EpisodeStatus.get_episode_status_label( self._episode_status) self._metrics_.append(training_metric) def upload_episode_metrics(self): json_metrics = json.dumps({'metrics': self._metrics_}) self._s3_metrics.persist(body=json_metrics, s3_kms_extra_args=get_s3_kms_extra_args()) if self._is_eval_: self._current_eval_pct_list_.append(self._progress_) def upload_step_metrics(self, metrics): self._progress_ = metrics[StepMetrics.PROG.value] self._episode_status = metrics[StepMetrics.EPISODE_STATUS.value] if not self._is_eval_: metrics[StepMetrics.EPISODE.value] = self._episode_ self._episode_reward_ += metrics[StepMetrics.REWARD.value] StepMetrics.validate_dict(metrics) sim_trace_log(metrics) if self.is_save_simtrace_enabled: write_simtrace_to_local_file(self._simtrace_local_path, metrics) self._update_mp4_video_metrics(metrics) def update(self, data): self._is_eval_ = data != RunPhase.TRAIN if not self._is_eval_ and self._use_model_picker: if self._eval_stats_dict_['chkpnt_name'] is None: self._eval_stats_dict_[ 'chkpnt_name'] = self._checkpoint_state_.read().name self._eval_trials_ = 0 mean_pct = statistics.mean(self._current_eval_pct_list_ if \ self._current_eval_pct_list_ else [0.0]) LOGGER.info( 'Number of evaluations: {} Evaluation progresses: {}'.format( len(self._current_eval_pct_list_), self._current_eval_pct_list_)) LOGGER.info('Evaluation progresses mean: {}'.format(mean_pct)) self._current_eval_pct_list_.clear() time_stamp = self._current_sim_time if mean_pct >= self._eval_stats_dict_['avg_comp_pct']: LOGGER.info('Current mean: {} >= Current best mean: {}'.format( mean_pct, self._eval_stats_dict_['avg_comp_pct'])) LOGGER.info( 'Updating the best checkpoint to "{}" from "{}".'.format( self._eval_stats_dict_['chkpnt_name'], self._best_chkpnt_stats['name'])) self._eval_stats_dict_['avg_comp_pct'] = mean_pct self._best_chkpnt_stats = { 'name': self._eval_stats_dict_['chkpnt_name'], 'avg_comp_pct': mean_pct, 'time_stamp': time_stamp } last_chkpnt_stats = { 'name': self._eval_stats_dict_['chkpnt_name'], 'avg_comp_pct': mean_pct, 'time_stamp': time_stamp } self._deepracer_checkpoint_json.persist( body=json.dumps({ BEST_CHECKPOINT: self._best_chkpnt_stats, LAST_CHECKPOINT: last_chkpnt_stats }), s3_kms_extra_args=get_s3_kms_extra_args()) # Update the checkpoint name to the new checkpoint being used for training that will # then be evaluated, note this class gets notfied when the system is put into a # training phase and assumes that a training phase only starts when a new check point # is avaialble self._eval_stats_dict_[ 'chkpnt_name'] = self._checkpoint_state_.read().name def _update_mp4_video_metrics(self, metrics): agent_x, agent_y = metrics[StepMetrics.X.value], metrics[ StepMetrics.Y.value] self._video_metrics[Mp4VideoMetrics.LAP_COUNTER.value] = 0 self._video_metrics[ Mp4VideoMetrics.COMPLETION_PERCENTAGE.value] = self._progress_ # For continuous race, MP4 video will display the total reset counter for the entire race # For non-continuous race, MP4 video will display reset counter per lap self._video_metrics[Mp4VideoMetrics.RESET_COUNTER.value] = 0 self._video_metrics[Mp4VideoMetrics.THROTTLE.value] = 0 self._video_metrics[Mp4VideoMetrics.STEERING.value] = 0 self._video_metrics[Mp4VideoMetrics.BEST_LAP_TIME.value] = 0 self._video_metrics[Mp4VideoMetrics.TOTAL_EVALUATION_TIME.value] = 0 self._video_metrics[Mp4VideoMetrics.DONE.value] = metrics[ StepMetrics.DONE.value] self._video_metrics[Mp4VideoMetrics.X.value] = agent_x self._video_metrics[Mp4VideoMetrics.Y.value] = agent_y object_poses = [pose for object_name, pose in self.track_data.object_poses.items()\ if not object_name.startswith('racecar')] object_locations = [] for pose in object_poses: point = Point32() point.x, point.y, point.z = pose.position.x, pose.position.y, 0 object_locations.append(point) self._video_metrics[ Mp4VideoMetrics.OBJECT_LOCATIONS.value] = object_locations def _handle_get_video_metrics(self, req): return VideoMetricsSrvResponse( self._video_metrics[Mp4VideoMetrics.LAP_COUNTER.value], self._video_metrics[Mp4VideoMetrics.COMPLETION_PERCENTAGE.value], self._video_metrics[Mp4VideoMetrics.RESET_COUNTER.value], self._video_metrics[Mp4VideoMetrics.THROTTLE.value], self._video_metrics[Mp4VideoMetrics.STEERING.value], self._video_metrics[Mp4VideoMetrics.BEST_LAP_TIME.value], self._video_metrics[Mp4VideoMetrics.TOTAL_EVALUATION_TIME.value], self._video_metrics[Mp4VideoMetrics.DONE.value], self._video_metrics[Mp4VideoMetrics.X.value], self._video_metrics[Mp4VideoMetrics.Y.value], self._video_metrics[Mp4VideoMetrics.OBJECT_LOCATIONS.value])
class EvalMetrics(MetricsInterface, AbstractTracker): '''This class is responsible for uploading eval metrics to s3''' def __init__(self, agent_name, s3_dict_metrics, is_continuous): '''Init eval metrics Args: agent_name (string): agent name s3_dict_metrics (dict): Dictionary containing the required s3 info for the metrics bucket with keys specified by MetricsS3Keys is_continuous (bool): True if continuous race, False otherwise ''' self._agent_name_ = agent_name self._s3_metrics = Metrics( bucket=s3_dict_metrics[MetricsS3Keys.METRICS_BUCKET.value], s3_key=s3_dict_metrics[MetricsS3Keys.METRICS_KEY.value], region_name=s3_dict_metrics[MetricsS3Keys.REGION.value]) self._is_continuous = is_continuous self._start_time_ = time.time() self._number_of_trials_ = 0 self._progress_ = 0.0 self._episode_status = '' self._metrics_ = list() # This is used to calculate the actual distance traveled by the car self._agent_xy = list() self._prev_step_time = time.time() self.is_save_simtrace_enabled = rospy.get_param( 'SIMTRACE_S3_BUCKET', None) # Create the agent specific directories needed for storing the metric files self._simtrace_local_path = SIMTRACE_EVAL_LOCAL_PATH_FORMAT.format( self._agent_name_) simtrace_dirname = os.path.dirname(self._simtrace_local_path) if simtrace_dirname or not os.path.exists(simtrace_dirname): os.makedirs(simtrace_dirname) self.reset_count_dict = { EpisodeStatus.CRASHED.value: 0, EpisodeStatus.OFF_TRACK.value: 0, EpisodeStatus.IMMOBILIZED.value: 0, EpisodeStatus.REVERSED.value: 0 } self._best_lap_time = float('inf') self._total_evaluation_time = 0 self._video_metrics = Mp4VideoMetrics.get_empty_dict() self._reset_count_sum = 0 self._current_sim_time = 0 self.track_data = TrackData.get_instance() rospy.Service("/{}/{}".format(self._agent_name_, "mp4_video_metrics"), VideoMetricsSrv, self._handle_get_video_metrics) AbstractTracker.__init__(self, TrackerPriority.HIGH) def update_tracker(self, delta_time, sim_time): """ Callback when sim time is updated Args: delta_time (float): time diff from last call sim_time (Clock): simulation time """ self._current_sim_time = sim_time.clock.secs + 1.e-9 * sim_time.clock.nsecs def reset(self): self._start_time_ = self._current_sim_time self._reset_count_sum += \ self.reset_count_dict[EpisodeStatus.CRASHED.value] +\ self.reset_count_dict[EpisodeStatus.IMMOBILIZED.value] +\ self.reset_count_dict[EpisodeStatus.OFF_TRACK.value] +\ self.reset_count_dict[EpisodeStatus.REVERSED.value] for key in self.reset_count_dict.keys(): self.reset_count_dict[key] = 0 def append_episode_metrics(self): self._number_of_trials_ += 1 eval_metric = dict() eval_metric['completion_percentage'] = int(self._progress_) eval_metric['metric_time'] = int(round(self._current_sim_time * 1000)) eval_metric['start_time'] = int(round(self._start_time_ * 1000)) eval_metric['elapsed_time_in_milliseconds'] = \ int(round((self._current_sim_time - self._start_time_) * 1000)) eval_metric['trial'] = int(self._number_of_trials_) eval_metric['episode_status'] = EpisodeStatus.get_episode_status_label( self._episode_status) eval_metric['crash_count'] = self.reset_count_dict[ EpisodeStatus.CRASHED.value] eval_metric['immobilized_count'] = self.reset_count_dict[ EpisodeStatus.IMMOBILIZED.value] eval_metric['off_track_count'] = self.reset_count_dict[ EpisodeStatus.OFF_TRACK.value] eval_metric['reversed_count'] = self.reset_count_dict[ EpisodeStatus.REVERSED.value] eval_metric['reset_count'] = eval_metric['crash_count'] + \ eval_metric['immobilized_count'] + \ eval_metric['off_track_count'] + \ eval_metric['reversed_count'] self._best_lap_time = min(eval_metric['elapsed_time_in_milliseconds'], self._best_lap_time) self._total_evaluation_time += eval_metric[ 'elapsed_time_in_milliseconds'] self._metrics_.append(eval_metric) def upload_episode_metrics(self): json_metrics = json.dumps({'metrics': self._metrics_}) self._s3_metrics.persist(body=json_metrics, s3_kms_extra_args=get_s3_kms_extra_args()) def _update_mp4_video_metrics(self, metrics): actual_speed = 0 cur_time = self._current_sim_time agent_x, agent_y = metrics[StepMetrics.X.value], metrics[ StepMetrics.Y.value] if self._agent_xy: # Speed = Distance/Time delta_time = cur_time - self._prev_step_time actual_speed = 0 if delta_time: actual_speed = math.sqrt( (self._agent_xy[0] - agent_x)**2 + (self._agent_xy[1] - agent_y)**2) / delta_time self._agent_xy = [agent_x, agent_y] self._prev_step_time = cur_time self._video_metrics[ Mp4VideoMetrics.LAP_COUNTER.value] = self._number_of_trials_ self._video_metrics[ Mp4VideoMetrics.COMPLETION_PERCENTAGE.value] = self._progress_ # For continuous race, MP4 video will display the total reset counter for the entire race # For non-continuous race, MP4 video will display reset counter per lap self._video_metrics[Mp4VideoMetrics.RESET_COUNTER.value] = \ self.reset_count_dict[EpisodeStatus.CRASHED.value] + \ self.reset_count_dict[EpisodeStatus.IMMOBILIZED.value] + \ self.reset_count_dict[EpisodeStatus.OFF_TRACK.value] + \ self.reset_count_dict[EpisodeStatus.REVERSED.value] + \ (self._reset_count_sum if self._is_continuous else 0) self._video_metrics[Mp4VideoMetrics.THROTTLE.value] = actual_speed self._video_metrics[Mp4VideoMetrics.STEERING.value] = metrics[ StepMetrics.STEER.value] self._video_metrics[ Mp4VideoMetrics.BEST_LAP_TIME.value] = self._best_lap_time self._video_metrics[Mp4VideoMetrics.TOTAL_EVALUATION_TIME.value] = self._total_evaluation_time +\ int(round((self._current_sim_time - self._start_time_) * 1000)) self._video_metrics[Mp4VideoMetrics.DONE.value] = metrics[ StepMetrics.DONE.value] self._video_metrics[Mp4VideoMetrics.X.value] = agent_x self._video_metrics[Mp4VideoMetrics.Y.value] = agent_y object_poses = [pose for object_name, pose in self.track_data.object_poses.items()\ if not object_name.startswith('racecar')] object_locations = [] for pose in object_poses: point = Point32() point.x, point.y, point.z = pose.position.x, pose.position.y, 0 object_locations.append(point) self._video_metrics[ Mp4VideoMetrics.OBJECT_LOCATIONS.value] = object_locations def upload_step_metrics(self, metrics): metrics[StepMetrics.EPISODE.value] = self._number_of_trials_ self._progress_ = metrics[StepMetrics.PROG.value] self._episode_status = metrics[StepMetrics.EPISODE_STATUS.value] if self._episode_status in self.reset_count_dict: self.reset_count_dict[self._episode_status] += 1 StepMetrics.validate_dict(metrics) sim_trace_log(metrics) if self.is_save_simtrace_enabled: write_simtrace_to_local_file(self._simtrace_local_path, metrics) self._update_mp4_video_metrics(metrics) def _handle_get_video_metrics(self, req): return VideoMetricsSrvResponse( self._video_metrics[Mp4VideoMetrics.LAP_COUNTER.value], self._video_metrics[Mp4VideoMetrics.COMPLETION_PERCENTAGE.value], self._video_metrics[Mp4VideoMetrics.RESET_COUNTER.value], self._video_metrics[Mp4VideoMetrics.THROTTLE.value], self._video_metrics[Mp4VideoMetrics.STEERING.value], self._video_metrics[Mp4VideoMetrics.BEST_LAP_TIME.value], self._video_metrics[Mp4VideoMetrics.TOTAL_EVALUATION_TIME.value], self._video_metrics[Mp4VideoMetrics.DONE.value], self._video_metrics[Mp4VideoMetrics.X.value], self._video_metrics[Mp4VideoMetrics.Y.value], self._video_metrics[Mp4VideoMetrics.OBJECT_LOCATIONS.value])