def _load_np(self): local_dict = AttrDict() if self._input_file is None: for key in self._all_names: local_dict[key] = [] split_indices = np.array([]) else: logger.debug('Loading ' + self._input_file) datadict = np.load(self._input_file, mmap_mode="r", allow_pickle=True) self._data_len = len(datadict['done']) split_indices = np.where( datadict['done'])[0] + 1 # one after each episode ends # remove the last chunk since it will be empty if 0 < self._data_len == split_indices[-1]: split_indices = np.delete(split_indices, -1) for key in self._all_names: assert key in datadict, f'{key} not in np file' assert len(datadict[key]) == self._data_len # split by episode local_dict[key] = np.split(datadict[key], split_indices) logger.debug('Dataset length: {}'.format(self._data_len)) return local_dict, split_indices
def __init__(self, params, env_spec, dataset_train): super(Model, self).__init__() self._env_spec = env_spec self._dataset_train = dataset_train if "cuda" not in str(params.device) or torch.cuda.is_available(): self._device = torch.device(params.device) else: self._device = torch.device("cpu") # TODO logger logger.debug("Model using device: {}".format(self._device)) keys = params.leaf_keys() # first thing called in model forward (inputs) -> new_inputs if 'preproc_fn' in keys: self.set_fn("_preproc_fn", params.preproc_fn, Callable[[AttrDict], AttrDict]) # mutation okay # updates model outputs (inputs, model_outputs) -> new_model_outputs if 'postproc_fn' in keys: self.set_fn("_postproc_fn", params.postproc_fn, Callable[[AttrDict, AttrDict], AttrDict]) # mutation okay # gets model loss (inputs, outputs, model_outputs) -> loss torch tensor if 'loss_fn' in keys: self.set_fn("_loss_fn", params.loss_fn, Callable[[AttrDict, AttrDict, AttrDict], torch.Tensor]) self._init_params_to_attrs(params) self._init_setup()
def restore_checkpoint(model, fname): logger.debug('Restoring model {0}'.format(fname)) assert tf.train.checkpoint_exists(fname) checkpointer = tf.train.Checkpoint(model=model) status = checkpointer.restore(fname) if not tf.executing_eagerly(): status.initialize_or_restore(tf.get_default_session())
def func(state, goal, i): nonlocal mask if mask is None: mask = np.ones(state.shape) abs_diff = np.abs(np.multiply(state, mask) - goal) logger.debug("[TEMP] offsets / within: " + str(abs_diff) + " / " + str(abs_diff <= thresh_array)) return np.all(abs_diff <= thresh_array)
def _step(self, u, no_rew_pub=False, step_extra_func=None, buffer_window_size=-1, **kwargs): self._enforce_dimension(self._obs, u) # Limit the control inputs u0 = np.clip(u, self.ac_lb, self.ac_ub) last_state = np.copy(self._obs) # update vx, vy, dz (does NOT publish) self._set_motion(u0[0], u0[1], u0[2]) vec = self._sim.step(self._curr_motion) self._prev_obs = advance_history_np(self._prev_obs[None], self._obs[None])[0] self._prev_act = advance_history_np(self._prev_act[None], u0[None])[0] self._obs[0] = vec.vector.x self._obs[1] = vec.vector.y self._obs[2] = vec.vector.z self.cost = self.get_cost() self.done = self.cost <= self.cost_tol if np.any(np.isnan(self._obs)): logger.debug('CF SANITY: NAN POSITION') logger.debug( "[SanityEnv]: lat=%d, AC: %s, OB: %s, NEXT_OB: %s, GOAL: %s, REW: %f" % (self._latent_idx, np.array2string(u0, separator=', '), np.array2string(last_state, separator=', '), np.array2string(self._obs, separator=', '), np.array2string(self.ac_goal_pos, separator=', '), -self.cost)) ## EXTRA FUNCTIONALITY for inferring latent online ret = None if step_extra_func is not None: def exit_condition(iters): return self._step_rate.remaining().to_sec() < 1e-7 \ and self._step_rate.sleep() is None ret = step_extra_func(exit_cond=exit_condition, buffer_window_size=buffer_window_size ) # returns at or before sleep_end_time mu = ret[0][0] logsig = ret[1][0] # TODO do something with these else: self._step_rate.sleep() # sleep until next iter return self.get_obs(), self.get_goal(), self.done
def _load_mat(self): local_dict = AttrDict() if self._input_file is None: local_dict = self._env_spec.get_zeros(self._all_names, 0) # np else: logger.debug('Loading ' + self._input_file) samples = scipy.io.loadmat(self._input_file) # split into chunks by episode. dict = {key: list of [Ni, key_shape]} data_dict = split_data_by_episodes(samples, horizon=self._planning_horizon, n_obs=self._obs_history_length, n_acs=self._acs_history_length) self._mu_obs = data_dict['mu_obs'] self._sigma_obs = data_dict['sigma_obs'] self._mu_delta_obs = data_dict['mu_delta_obs'] self._sigma_delta_obs = data_dict['sigma_delta_obs'] self._action_sequences = np.concatenate( data_dict['act_seq'], axis=0).astype(self._env_spec.names_to_dtypes['act']) split_indices = np.cumsum(data_dict['episode_sizes']) # remove the last chunk since it will be empty split_indices = np.delete(split_indices, -1) if self._split_indices.size > 0: self._split_indices = np.concatenate([ self._split_indices, np.array([self._data_len]), self._data_len + split_indices ], axis=0) else: self._split_indices = split_indices self._num_episodes += len(data_dict['done']) self._data_len += np.sum(data_dict['episode_sizes']) logger.debug('Dataset length: {}'.format(self._data_len)) for key in self._all_names: assert key in data_dict, f'{key} not in converted mat file' assert len(data_dict[key]) > 0 # turn list into np array with the correct type local_dict[key] = np.concatenate( data_dict[key], axis=0).astype(self._env_spec.names_to_dtypes[key]) assert local_dict[key].shape[1:] == self._env_spec.names_to_shapes[key], \ "Bad Data shape for {}: {}, requires {}" \ .format(key, local_dict[key].shape[1:], self._env_spec.names_to_shapes[key]) # print(key, self._env_spec.names_to_shapes[key], local_dict[key].shape) assert local_dict[key].shape[0] == self._data_len, \ "Bad datalen for {}: {}, requires {}".format(key, local_dict[key].shape, self._data_len) return local_dict
def _log(self): logger.info('') logger.info('Step {0}'.format(self._get_current_step() - 1)) with tf.contrib.summary.always_record_summaries(): for key, value in sorted(self._tb_logger.items(), key=lambda kv: kv[0]): logger.info('{0} {1:.6f}'.format(key, np.mean(value))) tf.contrib.summary.scalar(key, np.mean(value)) self._tb_logger.clear() for line in str(timeit).split('\n'): logger.debug(line) timeit.reset()
def _restore_checkpoint(self): # TODO path = os.path.join(self._file_manager.models_dir, self._checkpoint_model_file) if os.path.isfile(path): checkpoint = torch.load(str(path)) self._model.restore_from_checkpoint(checkpoint) self._current_step = checkpoint['step'] self._current_train_step = checkpoint['train_step'] self._current_holdout_step = checkpoint['holdout_step'] self._current_latent_step = checkpoint['latent_step'] self._current_train_loss = checkpoint['train_loss'] self._current_holdout_loss = checkpoint['holdout_loss'] self._current_latent_loss = checkpoint['latent_loss'] logger.debug("Loaded model, current train step: {}".format(self._current_step))
def reset(self, zero_at=None, new_iterator=None): if zero_at is not None: self.origin = zero_at if new_iterator is not None: self.iterator = new_iterator else: _, self.original, self.iterator = tee(self.original, 3) self.curr_goal = None self.completed = False self.i = 0 logger.debug("RESET AT: " + str(self.origin[:2]))
def eval_policy(dataset, save_file_name): b_size = dataset.batch_size d_size = len(dataset) obs_all = [] goals_all = [] output_actions = [] iters = math.ceil(d_size / b_size) for b in range(iters): logger.debug("[%d/%d]: Eval policy" % (b, iters)) idxs = np.arange(start=b * b_size, stop=min((b + 1) * b_size, d_size)) if args.random_goals: inputs, outputs = dataset.get_batch(indices=idxs, torch_device=model.device, get_horizon_goals=False) # this is to account for broadcasting to H+1 goals goals = env_spec.get_uniform( env_spec.goal_names, b_size, torch_device=model.device).unsqueeze(1) else: inputs, outputs, goals = dataset.get_batch( indices=idxs, torch_device=model.device, get_horizon_goals=True) # get obs batch obs = AttrDict() for name in env_spec.observation_names: obs[name] = inputs[name] act = policy.get_action(model, obs, goals, batch=True) goals_all.append(goals.leaf_apply(lambda v: to_numpy(v))) obs_all.append(obs.leaf_apply(lambda v: to_numpy(v))) output_actions.append(act.leaf_apply(lambda v: to_numpy(v))) # one big dictionary combined_obs = AttrDict.leaf_combine_and_apply( obs_all, lambda vs: np.concatenate(vs, axis=0)) combined_goals = AttrDict.leaf_combine_and_apply( goals_all, lambda vs: np.concatenate(vs, axis=0)) combined_output_actions = AttrDict.leaf_combine_and_apply( output_actions, lambda vs: np.concatenate(vs, axis=0)) combined_obs.combine(combined_goals) combined_obs.combine(combined_output_actions) logger.debug("Saving Action Sequences") savemat(save_file_name, combined_obs)
def _save(self): base_fname = "model.pt" if self._save_checkpoints: base_fname = "chkpt_{:07d}.pt".format(self._current_step) path = os.path.join(self._file_manager.models_dir, base_fname) torch.save({'step': self._current_step, 'train_step': self._current_train_step, 'holdout_step': self._current_holdout_step, 'latent_step': self._current_latent_step, 'train_loss': self._current_train_loss, 'holdout_loss': self._current_holdout_loss, 'latent_loss': self._current_latent_loss, 'model': self._model.state_dict()}, path) if self._save_checkpoints: shutil.copyfile(path, os.path.join(self._file_manager.models_dir, "model.pt")) logger.debug("Saved model")
def _init_setup(self): self._bg_policy = None if self._background_policy_cls is not None and self._background_policy_params is not None: self._bg_policy = self._background_policy_cls( self._background_policy_params, self._env_spec) assert isinstance(self._bg_policy, Policy) logger.debug("[RosPolicy] Using background policy class: %s" % self._background_policy_cls) self._latest_action = None self._ros_motion_sub = ru.bind_subscriber( RosTopic(self._ros_action_topic, self._ros_action_type), self.action_callback) logger.debug("[RosPolicy] Waiting for action messages to translate.") while self._latest_action is None: pass
def _step(self, action, **kwargs): with timeit("copter_step", reset_on_stop=True): obs, _, done = self._copter.step(action, no_rew_pub=True, **kwargs) pos = obs.obs[0] true_reward = -self._get_cost(pos) reward = self.reward_mask(pos, self.trajectory.get_i()) # TODO self._copter.pub_rew(rx=true_reward, ry=reward) # publish the true reward and masked reward for the sake of accurate data collect # print(pos[:3], self.ac_goal_pos[:3], self.ac_goal_pos_index) # print('reward', reward) # print(action, np.array2string(pos, separator=', '), self._copter._curr_goal_pos, reward, true_reward, done, # self.trajectory.get_i(), self.trajectory.curr_goal) self._copter.set_target(self.trajectory.next(pos), self.fix_waypoint3) done = done or self.trajectory.is_finished() # returns true when the full trajectory has been run through ### online control safety measures # stop if out of frame if pos[0] < 1e-4 and pos[1] < 1e-4: # target is out of frame done = True logger.warn("[TELLO CTRL] Target Left frame! Terminating rollout") # if terminating, send stop signal if self._use_data_capture and not self._copter.offline and done: self._copter.sig_rollout(end=True) goal = self.get_goal() def arr3D_2str(arr, names=("x", "y", "z")): str = "{%s: %.5f, %s: %.5f, %s: %.5f }" \ % (names[0], arr[0], names[1], arr[1], names[2], arr[2]) return str logger.debug("[TPC] T: %.4f sec // OBS: %s -- GOAL: %s -- ACT: %s" % (timeit.elapsed("copter_step"), arr3D_2str(pos), arr3D_2str(goal.goal_obs[0, 0]), arr3D_2str(action.act[0]))) return obs, goal, done
args = parser.parse_args() config_fname = os.path.abspath(args.config) model_fname = os.path.abspath(args.model) assert os.path.exists(config_fname), '{0} does not exist'.format(config_fname) params = import_config(config_fname) params.freeze() file_manager = FileManager(params.exp_name, is_continue=True) if args.model is not None: model_fname = os.path.abspath(args.model) assert os.path.exists(model_fname), 'Model: {0} does not exist'.format( model_fname) logger.debug("Using model: {}".format(model_fname)) elif "checkpoint_model_file" in params.trainer.params: model_fname = os.path.join(file_manager.models_dir, params.trainer.params.checkpoint_model_file) logger.debug( "Using checkpoint model for current eval: {}".format(model_fname)) else: model_fname = os.path.join(file_manager.models_dir, "model.pt") logger.debug( "Using default model for current eval: {}".format(model_fname)) env_spec = params.env_spec.cls(params.env_spec.params) env = params.env.cls(params.env.params, env_spec) model = params.model.cls(params.model.params, env_spec, None) policy = params.policy.cls(params.policy.params, env_spec)
def eval_model(dataset, save_file_name): b_size = dataset.batch_size d_size = len(dataset) pred_trajectories = [] action_sequences = [] true_trajectories = [] costs = [] iters = math.ceil(d_size / b_size) for b in range(iters): logger.debug("[%d/%d]: Eval model" % (b, iters)) idxs = np.arange(start=b * b_size, stop=min((b + 1) * b_size, d_size)) inputs, outputs, goals = dataset.get_batch(indices=idxs, torch_device=model.device, get_horizon_goals=True, get_action_seq=True) # get obs batch obs = AttrDict() for name in env_spec.observation_names: obs[name] = inputs[name] act_seq = AttrDict() act_seq['act'] = inputs['act_seq'] model.eval() all_obs, all_mouts = rollout(env_spec, model, obs, act_seq, policy._advance_obs_fn) # first unsqueezes and then concats all_obs = AttrDict.leaf_combine_and_apply( all_obs, func=lambda vs: torch.cat(vs, dim=1), map_func=lambda arr: arr.unsqueeze(1)) all_mouts = AttrDict.leaf_combine_and_apply( all_mouts, func=lambda vs: torch.cat(vs, dim=1), map_func=lambda arr: arr.unsqueeze(1)) cost_dict = AttrDict( {'costs': policy._cost_fn(all_obs, goals, act_seq, all_mouts)}) true_trajectories.append(goals.leaf_apply(lambda v: to_numpy(v))) pred_trajectories.append(all_obs.leaf_apply(lambda v: to_numpy(v))) action_sequences.append(act_seq.leaf_apply(lambda v: to_numpy(v))) costs.append(cost_dict.leaf_apply(lambda v: to_numpy(v))) # one big dictionary final_dict = AttrDict.leaf_combine_and_apply( true_trajectories, lambda vs: np.concatenate(vs, axis=0)) combined_pred = AttrDict.leaf_combine_and_apply( pred_trajectories, lambda vs: np.concatenate(vs, axis=0)) combined_acts = AttrDict.leaf_combine_and_apply( action_sequences, lambda vs: np.concatenate(vs, axis=0)) combined_costs = AttrDict.leaf_combine_and_apply( costs, lambda vs: np.concatenate(vs, axis=0)) final_dict.combine(combined_pred) final_dict.combine(combined_acts) # no overlapping keys final_dict.combine(combined_costs) logger.debug("Saving Model Trajectories") logger.debug("Keys: " + str(final_dict.keys())) savemat(save_file_name, final_dict)
def __init__(self, params, env_spec): env_spec.assert_has_names(['obs', 'act', 'prev_obs', 'prev_act']) self._env_spec = env_spec self.x_dim = self._env_spec.names_to_shapes.obs[-1] # dimension of obs self.u_dim = self._env_spec.names_to_shapes.act[-1] # dimension of act self._obs_hist_len = self._env_spec.names_to_shapes.prev_obs[-2] self._act_hist_len = self._env_spec.names_to_shapes.prev_act[-2] # Setup the parameters self._dt = params.dt self._ros_prefix = params.ros_prefix self.offline = params.offline # running offline means don't publish anything (e.g. from rosbag) self.horizon = int(params.horizon) self._normalize = params.normalize if self._normalize: logger.debug("Normalizing input ON") self._coef_cx = 1. / params.img_width self._coef_cy = 1. / params.img_height self._coef_area = 1. / (params.img_width * params.img_height) else: self._coef_cx = self._coef_cy = self._coef_area = 1. self.time_last_step = -1 self._obs = np.zeros((self.x_dim,)) self._next_obs = np.zeros((self.x_dim,)) self._prev_obs = None self._prev_act = None self._ob_lb, self._ob_ub = self._env_spec.limits(['obs']) self._ob_lb, self._ob_ub = self._ob_lb[0], self._ob_ub[0] self.observation_space = spaces.Box(self._ob_lb, self._ob_ub) self._initial_goal_pos = params.initial_goal_pos self._curr_goal_pos = None self._cost = 0 self._done = False # indicates if the copter has completed/failed its rollout self._running = False # indicates if a rollout is currently running # start with an assumption of collision to allow for takeoff self.collided = True self._prev_act = None self._ac_lb, self._ac_ub = self._env_spec.limits(['act']) self._ac_lb, self._ac_ub = self._ac_lb[0], self._ac_ub[0] self.action_space = spaces.Box(self._ac_lb, self._ac_ub) # ros callbacks self._curr_motion = CFMotion() self._ros_topics_and_types = { self._ros_prefix + 'coll': Bool, self._ros_prefix + 'data': CFData, 'extcam/target_vector': Vector3Stamped, } self._ros_msgs = dict() self._ros_msg_times = dict() # ROS setup ru.setup_ros_node('MBRLNode', anonymous=True) for topic, type in self._ros_topics_and_types.items(): ru.bind_subscriber(RosTopic(topic, type), self.ros_msg_update, callback_args=(topic,)) # does not use these publishers if _offline _, self._ros_motion_pub = ru.get_publisher(RosTopic(self._ros_prefix + "motion", CFMotion), queue_size=10) _, self._ros_command_pub = ru.get_publisher(RosTopic(self._ros_prefix + "command", CFCommand), queue_size=10) _, self._ros_goal_pub = ru.get_publisher(RosTopic("/mpc/goal_vector", Vector3Stamped), queue_size=10) _, self._ros_action_pub = ru.get_publisher(RosTopic("/mpc/action_vector", Vector3Stamped), queue_size=10) _, self._ros_reward_pub = ru.get_publisher(RosTopic("/mpc/reward_vector", Vector3Stamped), queue_size=10) _, self._ros_latent_pub = ru.get_publisher(RosTopic("/mpc/latent_vector", Vector3Stamped), queue_size=10) _, self._ros_trajectory_marker_pub = ru.get_publisher(RosTopic("/mpc/trajectory_marker", Marker), queue_size=500) _, self._ros_ac_marker_pub = ru.get_publisher(RosTopic("/mpc/action_marker", Marker), queue_size=10) logger.info("[COPTER]: Initialized, waiting for topic streams before starting...") while not self.ros_is_good(display=False): pass logger.info("[COPTER]: Topics are active.") self._step_rate = rospy.Rate(1. / self._dt) # used in step extra function self._rate = rospy.Rate(1. / self._dt) # used only in background thread # this background publisher thread is only when we are online if not self.offline: ru.setup_background_thread(self._bg_loop_fn, self._rate) ru.start_background_thread() self.reset()
def _step(self, u, no_rew_pub=False, step_extra_func=None, buffer_window_size=-1, **kwargs): self._running = True # signifies that we are publishing actions now if not self.offline: assert self.ros_is_good(display=True) # else: # u = self._latest_offline_ac.copy() obs = self._next_obs.copy() self._enforce_dimension(obs, u) # Limit the control inputs u0 = np.clip(u, self._ac_lb, self._ac_ub) # update vx, vy, dz (does NOT publish) self._set_motion(u0[0], u0[1], u0[2]) self._done = np.all(obs == self._curr_goal_pos) or self.collided if np.any(np.isnan(obs)): logger.debug('[CF]: NAN POSITION') # publishing action messages happens only if we are fully online if not self.offline: if any((self._curr_motion.x, self._curr_motion.y, self._curr_motion.yaw, self._curr_motion.dz)) or not self.collided: self._curr_motion.stamp.stamp = rospy.Time.now() self._ros_motion_pub.publish(self._curr_motion) else: # publish steady hold message when asking for zero motion motion = CFMotion() motion.is_flow_motion = True motion.stamp.stamp = rospy.Time.now() self._ros_motion_pub.publish(motion) else: # offline action publishing act_vec = Vector3Stamped() act_vec.header.stamp = rospy.Time.now() act_vec.vector.x = self._curr_motion.x act_vec.vector.y = self._curr_motion.y act_vec.vector.z = self._curr_motion.dz self._ros_action_pub.publish(act_vec) goal_vec = Vector3Stamped() goal_vec.header.stamp = rospy.Time.now() goal_vec.vector.x = self._curr_goal_pos[0] goal_vec.vector.y = self._curr_goal_pos[1] goal_vec.vector.z = self._curr_goal_pos[2] self._ros_goal_pub.publish(goal_vec) # TODO: set cost properly and in a single place if not no_rew_pub: self.pub_rew(-self._cost) ## EXTRA FUNCTIONALITY for inferring latent online ret = None if step_extra_func is not None: def exit_condition(iters): return self._step_rate.remaining().to_sec() < 1e-7 \ and self._step_rate.sleep() is None ret = step_extra_func(exit_cond=exit_condition, buffer_window_size=buffer_window_size) # returns at or before sleep_end_time mu = ret[0][0].item() sig = ret[1][0].exp().item() # TODO do something with these latent_vec = Vector3Stamped() latent_vec.header.stamp = rospy.Time.now() latent_vec.vector.x = mu latent_vec.vector.y = sig self._ros_latent_pub.publish(latent_vec) else: self._step_rate.sleep() # sleep until next iter # history update self._prev_obs = advance_history_np(self._prev_obs[None], obs[None])[0] self._prev_act = advance_history_np(self._prev_act[None], u0[None])[0] self._next_obs = self._obs.copy() # the exact obs the policy sees return self.get_obs(), self.get_goal(), self._done
def _log(self): logger.info('[Latent Training] Step: {}, Loss: {}, Steps/train (avg): {}' .format(self._current_latent_step, self._current_latent_loss, self._exp_avg_steps_per_train)) mu, lsig = self._model.get_online_latent_mu_logsig() logger.debug('[Latent Training] MU: %s, SIGMA: %s' % (to_numpy(mu).tolist(), to_numpy(lsig.exp()).tolist()))