def _log(self): logger.info('[{}] (steps, loss) -> TRAIN: ({}, {}), LATENT: ({} {}), HOLDOUT: ({}, {})' .format(self._current_step, self._current_train_step, self._current_train_loss, self._current_latent_step, self._current_latent_loss, self._current_holdout_step, self._current_holdout_loss) )
def _restore_latest_checkpoint(self): # restore checkpoint latest_ckpt_fname = tf.train.latest_checkpoint( self._file_manager.ckpts_dir) if latest_ckpt_fname: logger.info('Restoring ckpt {0}'.format(latest_ckpt_fname)) self._checkpointer.restore(latest_ckpt_fname) logger.info('Starting training from step = {0}'.format( self._get_current_step()))
def reset_model(self): if self._use_random_goal: self.ac_goal_pos = np.random.uniform(self.ob_lb, self.ob_ub, size=(self.x_dim, )) else: self.ac_goal_pos = np.copy(INITIAL_GOAL_POS) logger.info("[SanityEnv] Resetting..\n\n") self._prev_obs = np.tile(self._obs[None], (self._obs_hist_len, 1)) self._prev_act = np.zeros((self._act_hist_len, self.u_dim)) return self.get_obs(), self.get_goal()
def reset(self): if self._servant is None: self._servant = ServantSigController(self._ros_prefix) logger.info("[CF]: Resetting.") self._servant.reset_sig() # self.servant.send_sig(STOP) # acknowledges we received the stop (or triggers a data collection stop) logger.info( "[CF]: Waiting for Master Controller start before continuing") # now we wait for a start before continuing self._servant.wait_sig(START, rate=rospy.Rate(10)) self._servant.reset_sig() self.collided = False self._ext_stop = False self._first_step = True return self.reset_model(), self.get_goal()
def _step(self, u, no_rew_pub=False, step_extra_func=None): if self._first_step: self._servant.send_sig(START) # acknowledges we received the start self._first_step = False super()._step(u, no_rew_pub=no_rew_pub, step_extra_func=step_extra_func) # externally triggered stop, master controller waits for ack if self._servant.sender_stopped(): logger.info("[CF] STOPPED EXTERNALLY.") self.done = True if self._ext_stop: logger.info("[CF] STOPPED INTERNALLY.") self.done = True if self.done: self._servant.send_sig(STOP) return self.get_obs(), self.get_goal(), self._done
def reset(self): # this will take off the model if online self._copter.reset() # we need to wait til the observation is nonzero if not self._use_data_capture and not self._copter.offline: # only wait if not in data capture mode logger.info("[TELLO CTRL] Waiting for target to be in frame...") while not self.target_in_frame(): pass logger.info("[TELLO CTRL] Target is in frame, press enter to start the rollout: ") input() logger.info("[TELLO CTRL] Starting the rollout...") return self.reset_model(), self.get_goal()
def _log(self): logger.info('') logger.info('Step {0}'.format(self._get_current_step() - 1)) with tf.contrib.summary.always_record_summaries(): for key, value in sorted(self._tb_logger.items(), key=lambda kv: kv[0]): logger.info('{0} {1:.6f}'.format(key, np.mean(value))) tf.contrib.summary.scalar(key, np.mean(value)) self._tb_logger.clear() for line in str(timeit).split('\n'): logger.debug(line) timeit.reset()
def reset(self): self._running = True # stops background thread logger.info("[CF]: Resetting.") # means we ended on a collision if not self.offline and self.collided: # ensure we are on the ground self._set_command(0) logger.info("[CF]: Estop CMD Sent.") time.sleep(1) # block if we are waiting for takeoff readiness if not self.is_takeoff_ready(): logger.info("[CF]: Takeoff is not ready. Press Enter to Continue:") input() # takeoff self._set_command(2) logger.info("[CF]: Takeoff CMD Sent.") time.sleep(3) self.collided = False self._running = False # starts background thread to keep steady while waiting for commands return self.reset_model(), self.get_goal()
def __init__(self, params, env_spec): env_spec.assert_has_names(['obs', 'act', 'prev_obs', 'prev_act']) self._env_spec = env_spec self.x_dim = self._env_spec.names_to_shapes.obs[-1] # dimension of obs self.u_dim = self._env_spec.names_to_shapes.act[-1] # dimension of act self._obs_hist_len = self._env_spec.names_to_shapes.prev_obs[-2] self._act_hist_len = self._env_spec.names_to_shapes.prev_act[-2] # Setup the parameters self._dt = params.dt self._ros_prefix = params.ros_prefix self.offline = params.offline # running offline means don't publish anything (e.g. from rosbag) self.horizon = int(params.horizon) self._normalize = params.normalize if self._normalize: logger.debug("Normalizing input ON") self._coef_cx = 1. / params.img_width self._coef_cy = 1. / params.img_height self._coef_area = 1. / (params.img_width * params.img_height) else: self._coef_cx = self._coef_cy = self._coef_area = 1. self.time_last_step = -1 self._obs = np.zeros((self.x_dim,)) self._next_obs = np.zeros((self.x_dim,)) self._prev_obs = None self._prev_act = None self._ob_lb, self._ob_ub = self._env_spec.limits(['obs']) self._ob_lb, self._ob_ub = self._ob_lb[0], self._ob_ub[0] self.observation_space = spaces.Box(self._ob_lb, self._ob_ub) self._initial_goal_pos = params.initial_goal_pos self._curr_goal_pos = None self._cost = 0 self._done = False # indicates if the copter has completed/failed its rollout self._running = False # indicates if a rollout is currently running # start with an assumption of collision to allow for takeoff self.collided = True self._prev_act = None self._ac_lb, self._ac_ub = self._env_spec.limits(['act']) self._ac_lb, self._ac_ub = self._ac_lb[0], self._ac_ub[0] self.action_space = spaces.Box(self._ac_lb, self._ac_ub) # ros callbacks self._curr_motion = CFMotion() self._ros_topics_and_types = { self._ros_prefix + 'coll': Bool, self._ros_prefix + 'data': CFData, 'extcam/target_vector': Vector3Stamped, } self._ros_msgs = dict() self._ros_msg_times = dict() # ROS setup ru.setup_ros_node('MBRLNode', anonymous=True) for topic, type in self._ros_topics_and_types.items(): ru.bind_subscriber(RosTopic(topic, type), self.ros_msg_update, callback_args=(topic,)) # does not use these publishers if _offline _, self._ros_motion_pub = ru.get_publisher(RosTopic(self._ros_prefix + "motion", CFMotion), queue_size=10) _, self._ros_command_pub = ru.get_publisher(RosTopic(self._ros_prefix + "command", CFCommand), queue_size=10) _, self._ros_goal_pub = ru.get_publisher(RosTopic("/mpc/goal_vector", Vector3Stamped), queue_size=10) _, self._ros_action_pub = ru.get_publisher(RosTopic("/mpc/action_vector", Vector3Stamped), queue_size=10) _, self._ros_reward_pub = ru.get_publisher(RosTopic("/mpc/reward_vector", Vector3Stamped), queue_size=10) _, self._ros_latent_pub = ru.get_publisher(RosTopic("/mpc/latent_vector", Vector3Stamped), queue_size=10) _, self._ros_trajectory_marker_pub = ru.get_publisher(RosTopic("/mpc/trajectory_marker", Marker), queue_size=500) _, self._ros_ac_marker_pub = ru.get_publisher(RosTopic("/mpc/action_marker", Marker), queue_size=10) logger.info("[COPTER]: Initialized, waiting for topic streams before starting...") while not self.ros_is_good(display=False): pass logger.info("[COPTER]: Topics are active.") self._step_rate = rospy.Rate(1. / self._dt) # used in step extra function self._rate = rospy.Rate(1. / self._dt) # used only in background thread # this background publisher thread is only when we are online if not self.offline: ru.setup_background_thread(self._bg_loop_fn, self._rate) ru.start_background_thread() self.reset()
def _log(self): logger.info('[Latent Training] Step: {}, Loss: {}, Steps/train (avg): {}' .format(self._current_latent_step, self._current_latent_loss, self._exp_avg_steps_per_train)) mu, lsig = self._model.get_online_latent_mu_logsig() logger.debug('[Latent Training] MU: %s, SIGMA: %s' % (to_numpy(mu).tolist(), to_numpy(lsig.exp()).tolist()))
def _save(self): logger.info('Saving model...') self._checkpointer.save(self._file_manager.ckpt_prefix)