def _load_np(self):
        local_dict = AttrDict()

        if self._input_file is None:
            for key in self._all_names:
                local_dict[key] = []
            split_indices = np.array([])
        else:
            logger.debug('Loading ' + self._input_file)
            datadict = np.load(self._input_file,
                               mmap_mode="r",
                               allow_pickle=True)

            self._data_len = len(datadict['done'])
            split_indices = np.where(
                datadict['done'])[0] + 1  # one after each episode ends
            # remove the last chunk since it will be empty
            if 0 < self._data_len == split_indices[-1]:
                split_indices = np.delete(split_indices, -1)

            for key in self._all_names:
                assert key in datadict, f'{key} not in np file'
                assert len(datadict[key]) == self._data_len
                # split by episode
                local_dict[key] = np.split(datadict[key], split_indices)

            logger.debug('Dataset length: {}'.format(self._data_len))

        return local_dict, split_indices
    def __init__(self, params, env_spec, dataset_train):
        super(Model, self).__init__()

        self._env_spec = env_spec
        self._dataset_train = dataset_train

        if "cuda" not in str(params.device) or torch.cuda.is_available():
            self._device = torch.device(params.device)
        else:
            self._device = torch.device("cpu")

        # TODO logger
        logger.debug("Model using device: {}".format(self._device))

        keys = params.leaf_keys()
        # first thing called in model forward (inputs) -> new_inputs
        if 'preproc_fn' in keys:
            self.set_fn("_preproc_fn", params.preproc_fn, Callable[[AttrDict], AttrDict])  # mutation okay

        # updates model outputs (inputs, model_outputs) -> new_model_outputs
        if 'postproc_fn' in keys:
            self.set_fn("_postproc_fn", params.postproc_fn, Callable[[AttrDict, AttrDict], AttrDict])  # mutation okay

        # gets model loss (inputs, outputs, model_outputs) -> loss torch tensor
        if 'loss_fn' in keys:
            self.set_fn("_loss_fn", params.loss_fn, Callable[[AttrDict, AttrDict, AttrDict], torch.Tensor])

        self._init_params_to_attrs(params)
        self._init_setup()
Beispiel #3
0
def restore_checkpoint(model, fname):
    logger.debug('Restoring model {0}'.format(fname))
    assert tf.train.checkpoint_exists(fname)
    checkpointer = tf.train.Checkpoint(model=model)
    status = checkpointer.restore(fname)
    if not tf.executing_eagerly():
        status.initialize_or_restore(tf.get_default_session())
 def func(state, goal, i):
     nonlocal mask
     if mask is None:
         mask = np.ones(state.shape)
     abs_diff = np.abs(np.multiply(state, mask) - goal)
     logger.debug("[TEMP] offsets / within: " + str(abs_diff) + " / " +
                  str(abs_diff <= thresh_array))
     return np.all(abs_diff <= thresh_array)
    def _step(self,
              u,
              no_rew_pub=False,
              step_extra_func=None,
              buffer_window_size=-1,
              **kwargs):

        self._enforce_dimension(self._obs, u)

        # Limit the control inputs
        u0 = np.clip(u, self.ac_lb, self.ac_ub)

        last_state = np.copy(self._obs)

        # update vx, vy, dz (does NOT publish)
        self._set_motion(u0[0], u0[1], u0[2])
        vec = self._sim.step(self._curr_motion)

        self._prev_obs = advance_history_np(self._prev_obs[None],
                                            self._obs[None])[0]
        self._prev_act = advance_history_np(self._prev_act[None], u0[None])[0]

        self._obs[0] = vec.vector.x
        self._obs[1] = vec.vector.y
        self._obs[2] = vec.vector.z

        self.cost = self.get_cost()
        self.done = self.cost <= self.cost_tol

        if np.any(np.isnan(self._obs)):
            logger.debug('CF SANITY: NAN POSITION')

        logger.debug(
            "[SanityEnv]: lat=%d, AC: %s, OB: %s, NEXT_OB: %s, GOAL: %s, REW: %f"
            % (self._latent_idx, np.array2string(u0, separator=', '),
               np.array2string(last_state, separator=', '),
               np.array2string(self._obs, separator=', '),
               np.array2string(self.ac_goal_pos, separator=', '), -self.cost))

        ## EXTRA FUNCTIONALITY for inferring latent online
        ret = None
        if step_extra_func is not None:

            def exit_condition(iters):                return self._step_rate.remaining().to_sec() < 1e-7 \
                    and self._step_rate.sleep() is None

            ret = step_extra_func(exit_cond=exit_condition,
                                  buffer_window_size=buffer_window_size
                                  )  # returns at or before sleep_end_time
            mu = ret[0][0]
            logsig = ret[1][0]  # TODO do something with these
        else:
            self._step_rate.sleep()  # sleep until next iter

        return self.get_obs(), self.get_goal(), self.done
    def _load_mat(self):
        local_dict = AttrDict()

        if self._input_file is None:
            local_dict = self._env_spec.get_zeros(self._all_names, 0)  # np
        else:
            logger.debug('Loading ' + self._input_file)
            samples = scipy.io.loadmat(self._input_file)

            # split into chunks by episode. dict = {key: list of [Ni, key_shape]}
            data_dict = split_data_by_episodes(samples,
                                               horizon=self._planning_horizon,
                                               n_obs=self._obs_history_length,
                                               n_acs=self._acs_history_length)

            self._mu_obs = data_dict['mu_obs']
            self._sigma_obs = data_dict['sigma_obs']
            self._mu_delta_obs = data_dict['mu_delta_obs']
            self._sigma_delta_obs = data_dict['sigma_delta_obs']

            self._action_sequences = np.concatenate(
                data_dict['act_seq'],
                axis=0).astype(self._env_spec.names_to_dtypes['act'])

            split_indices = np.cumsum(data_dict['episode_sizes'])
            # remove the last chunk since it will be empty
            split_indices = np.delete(split_indices, -1)
            if self._split_indices.size > 0:
                self._split_indices = np.concatenate([
                    self._split_indices,
                    np.array([self._data_len]), self._data_len + split_indices
                ],
                                                     axis=0)
            else:
                self._split_indices = split_indices

            self._num_episodes += len(data_dict['done'])
            self._data_len += np.sum(data_dict['episode_sizes'])
            logger.debug('Dataset length: {}'.format(self._data_len))

            for key in self._all_names:
                assert key in data_dict, f'{key} not in converted mat file'
                assert len(data_dict[key]) > 0
                # turn list into np array with the correct type
                local_dict[key] = np.concatenate(
                    data_dict[key],
                    axis=0).astype(self._env_spec.names_to_dtypes[key])
                assert local_dict[key].shape[1:] == self._env_spec.names_to_shapes[key], \
                    "Bad Data shape for {}: {}, requires {}" \
                        .format(key, local_dict[key].shape[1:], self._env_spec.names_to_shapes[key])
                # print(key, self._env_spec.names_to_shapes[key], local_dict[key].shape)
                assert local_dict[key].shape[0] == self._data_len, \
                    "Bad datalen for {}: {}, requires {}".format(key, local_dict[key].shape, self._data_len)

        return local_dict
    def _log(self):
        logger.info('')
        logger.info('Step {0}'.format(self._get_current_step() - 1))
        with tf.contrib.summary.always_record_summaries():
            for key, value in sorted(self._tb_logger.items(),
                                     key=lambda kv: kv[0]):
                logger.info('{0} {1:.6f}'.format(key, np.mean(value)))
                tf.contrib.summary.scalar(key, np.mean(value))
            self._tb_logger.clear()

        for line in str(timeit).split('\n'):
            logger.debug(line)
        timeit.reset()
 def _restore_checkpoint(self):
     # TODO
     path = os.path.join(self._file_manager.models_dir, self._checkpoint_model_file)
     if os.path.isfile(path):
         checkpoint = torch.load(str(path))
         self._model.restore_from_checkpoint(checkpoint)
         self._current_step = checkpoint['step']
         self._current_train_step = checkpoint['train_step']
         self._current_holdout_step = checkpoint['holdout_step']
         self._current_latent_step = checkpoint['latent_step']
         self._current_train_loss = checkpoint['train_loss']
         self._current_holdout_loss = checkpoint['holdout_loss']
         self._current_latent_loss = checkpoint['latent_loss']
         logger.debug("Loaded model, current train step: {}".format(self._current_step))
    def reset(self, zero_at=None, new_iterator=None):
        if zero_at is not None:
            self.origin = zero_at

        if new_iterator is not None:
            self.iterator = new_iterator
        else:
            _, self.original, self.iterator = tee(self.original, 3)

        self.curr_goal = None
        self.completed = False
        self.i = 0

        logger.debug("RESET AT: " + str(self.origin[:2]))
Beispiel #10
0
def eval_policy(dataset, save_file_name):
    b_size = dataset.batch_size
    d_size = len(dataset)

    obs_all = []
    goals_all = []
    output_actions = []
    iters = math.ceil(d_size / b_size)
    for b in range(iters):
        logger.debug("[%d/%d]: Eval policy" % (b, iters))
        idxs = np.arange(start=b * b_size, stop=min((b + 1) * b_size, d_size))
        if args.random_goals:
            inputs, outputs = dataset.get_batch(indices=idxs,
                                                torch_device=model.device,
                                                get_horizon_goals=False)
            # this is to account for broadcasting to H+1 goals
            goals = env_spec.get_uniform(
                env_spec.goal_names, b_size,
                torch_device=model.device).unsqueeze(1)
        else:
            inputs, outputs, goals = dataset.get_batch(
                indices=idxs,
                torch_device=model.device,
                get_horizon_goals=True)

        # get obs batch
        obs = AttrDict()
        for name in env_spec.observation_names:
            obs[name] = inputs[name]

        act = policy.get_action(model, obs, goals, batch=True)

        goals_all.append(goals.leaf_apply(lambda v: to_numpy(v)))
        obs_all.append(obs.leaf_apply(lambda v: to_numpy(v)))
        output_actions.append(act.leaf_apply(lambda v: to_numpy(v)))

    # one big dictionary
    combined_obs = AttrDict.leaf_combine_and_apply(
        obs_all, lambda vs: np.concatenate(vs, axis=0))
    combined_goals = AttrDict.leaf_combine_and_apply(
        goals_all, lambda vs: np.concatenate(vs, axis=0))
    combined_output_actions = AttrDict.leaf_combine_and_apply(
        output_actions, lambda vs: np.concatenate(vs, axis=0))

    combined_obs.combine(combined_goals)
    combined_obs.combine(combined_output_actions)

    logger.debug("Saving Action Sequences")
    savemat(save_file_name, combined_obs)
 def _save(self):
     base_fname = "model.pt"
     if self._save_checkpoints:
         base_fname = "chkpt_{:07d}.pt".format(self._current_step)
     path = os.path.join(self._file_manager.models_dir, base_fname)
     torch.save({'step': self._current_step,
                 'train_step': self._current_train_step,
                 'holdout_step': self._current_holdout_step,
                 'latent_step': self._current_latent_step,
                 'train_loss': self._current_train_loss,
                 'holdout_loss': self._current_holdout_loss,
                 'latent_loss': self._current_latent_loss,
                 'model': self._model.state_dict()}, path)
     if self._save_checkpoints:
         shutil.copyfile(path, os.path.join(self._file_manager.models_dir, "model.pt"))
     logger.debug("Saved model")
    def _init_setup(self):
        self._bg_policy = None
        if self._background_policy_cls is not None and self._background_policy_params is not None:
            self._bg_policy = self._background_policy_cls(
                self._background_policy_params, self._env_spec)
            assert isinstance(self._bg_policy, Policy)
            logger.debug("[RosPolicy] Using background policy class: %s" %
                         self._background_policy_cls)

        self._latest_action = None
        self._ros_motion_sub = ru.bind_subscriber(
            RosTopic(self._ros_action_topic, self._ros_action_type),
            self.action_callback)

        logger.debug("[RosPolicy] Waiting for action messages to translate.")
        while self._latest_action is None:
            pass
    def _step(self, action, **kwargs):

        with timeit("copter_step", reset_on_stop=True):
            obs, _, done = self._copter.step(action, no_rew_pub=True, **kwargs)

        pos = obs.obs[0]
        true_reward = -self._get_cost(pos)
        reward = self.reward_mask(pos, self.trajectory.get_i())

        # TODO
        self._copter.pub_rew(rx=true_reward,
                             ry=reward)  # publish the true reward and masked reward for the sake of accurate data collect

        # print(pos[:3], self.ac_goal_pos[:3], self.ac_goal_pos_index)
        # print('reward', reward)
        # print(action, np.array2string(pos, separator=', '), self._copter._curr_goal_pos, reward, true_reward, done,
        #       self.trajectory.get_i(), self.trajectory.curr_goal)

        self._copter.set_target(self.trajectory.next(pos), self.fix_waypoint3)

        done = done or self.trajectory.is_finished()  # returns true when the full trajectory has been run through

        ### online control safety measures

        # stop if out of frame
        if pos[0] < 1e-4 and pos[1] < 1e-4:
            # target is out of frame
            done = True
            logger.warn("[TELLO CTRL] Target Left frame! Terminating rollout")

        # if terminating, send stop signal
        if self._use_data_capture and not self._copter.offline and done:
            self._copter.sig_rollout(end=True)

        goal = self.get_goal()

        def arr3D_2str(arr, names=("x", "y", "z")):
            str = "{%s: %.5f, %s: %.5f, %s: %.5f }" \
                  % (names[0], arr[0], names[1], arr[1], names[2], arr[2])
            return str

        logger.debug("[TPC] T: %.4f sec // OBS: %s -- GOAL: %s -- ACT: %s" %
                     (timeit.elapsed("copter_step"), arr3D_2str(pos), arr3D_2str(goal.goal_obs[0, 0]), arr3D_2str(action.act[0])))

        return obs, goal, done
Beispiel #14
0
args = parser.parse_args()

config_fname = os.path.abspath(args.config)
model_fname = os.path.abspath(args.model)
assert os.path.exists(config_fname), '{0} does not exist'.format(config_fname)

params = import_config(config_fname)
params.freeze()

file_manager = FileManager(params.exp_name, is_continue=True)

if args.model is not None:
    model_fname = os.path.abspath(args.model)
    assert os.path.exists(model_fname), 'Model: {0} does not exist'.format(
        model_fname)
    logger.debug("Using model: {}".format(model_fname))
elif "checkpoint_model_file" in params.trainer.params:
    model_fname = os.path.join(file_manager.models_dir,
                               params.trainer.params.checkpoint_model_file)
    logger.debug(
        "Using checkpoint model for current eval: {}".format(model_fname))
else:
    model_fname = os.path.join(file_manager.models_dir, "model.pt")
    logger.debug(
        "Using default model for current eval: {}".format(model_fname))

env_spec = params.env_spec.cls(params.env_spec.params)
env = params.env.cls(params.env.params, env_spec)
model = params.model.cls(params.model.params, env_spec, None)
policy = params.policy.cls(params.policy.params, env_spec)
def eval_model(dataset, save_file_name):
    b_size = dataset.batch_size
    d_size = len(dataset)

    pred_trajectories = []
    action_sequences = []
    true_trajectories = []
    costs = []

    iters = math.ceil(d_size / b_size)
    for b in range(iters):
        logger.debug("[%d/%d]: Eval model" % (b, iters))
        idxs = np.arange(start=b * b_size, stop=min((b + 1) * b_size, d_size))
        inputs, outputs, goals = dataset.get_batch(indices=idxs,
                                                   torch_device=model.device,
                                                   get_horizon_goals=True,
                                                   get_action_seq=True)

        # get obs batch
        obs = AttrDict()
        for name in env_spec.observation_names:
            obs[name] = inputs[name]

        act_seq = AttrDict()
        act_seq['act'] = inputs['act_seq']

        model.eval()
        all_obs, all_mouts = rollout(env_spec, model, obs, act_seq,
                                     policy._advance_obs_fn)

        # first unsqueezes and then concats
        all_obs = AttrDict.leaf_combine_and_apply(
            all_obs,
            func=lambda vs: torch.cat(vs, dim=1),
            map_func=lambda arr: arr.unsqueeze(1))
        all_mouts = AttrDict.leaf_combine_and_apply(
            all_mouts,
            func=lambda vs: torch.cat(vs, dim=1),
            map_func=lambda arr: arr.unsqueeze(1))

        cost_dict = AttrDict(
            {'costs': policy._cost_fn(all_obs, goals, act_seq, all_mouts)})

        true_trajectories.append(goals.leaf_apply(lambda v: to_numpy(v)))
        pred_trajectories.append(all_obs.leaf_apply(lambda v: to_numpy(v)))
        action_sequences.append(act_seq.leaf_apply(lambda v: to_numpy(v)))
        costs.append(cost_dict.leaf_apply(lambda v: to_numpy(v)))

    # one big dictionary
    final_dict = AttrDict.leaf_combine_and_apply(
        true_trajectories, lambda vs: np.concatenate(vs, axis=0))
    combined_pred = AttrDict.leaf_combine_and_apply(
        pred_trajectories, lambda vs: np.concatenate(vs, axis=0))
    combined_acts = AttrDict.leaf_combine_and_apply(
        action_sequences, lambda vs: np.concatenate(vs, axis=0))
    combined_costs = AttrDict.leaf_combine_and_apply(
        costs, lambda vs: np.concatenate(vs, axis=0))

    final_dict.combine(combined_pred)
    final_dict.combine(combined_acts)  # no overlapping keys
    final_dict.combine(combined_costs)

    logger.debug("Saving Model Trajectories")
    logger.debug("Keys: " + str(final_dict.keys()))
    savemat(save_file_name, final_dict)
    def __init__(self, params, env_spec):
        env_spec.assert_has_names(['obs', 'act', 'prev_obs', 'prev_act'])

        self._env_spec = env_spec

        self.x_dim = self._env_spec.names_to_shapes.obs[-1]  # dimension of obs
        self.u_dim = self._env_spec.names_to_shapes.act[-1]  # dimension of act

        self._obs_hist_len = self._env_spec.names_to_shapes.prev_obs[-2]
        self._act_hist_len = self._env_spec.names_to_shapes.prev_act[-2]

        # Setup the parameters
        self._dt = params.dt
        self._ros_prefix = params.ros_prefix

        self.offline = params.offline  # running offline means don't publish anything (e.g. from rosbag)
        self.horizon = int(params.horizon)

        self._normalize = params.normalize

        if self._normalize:
            logger.debug("Normalizing input ON")
            self._coef_cx = 1. / params.img_width
            self._coef_cy = 1. / params.img_height
            self._coef_area = 1. / (params.img_width * params.img_height)
        else:
            self._coef_cx = self._coef_cy = self._coef_area = 1.

        self.time_last_step = -1

        self._obs = np.zeros((self.x_dim,))
        self._next_obs = np.zeros((self.x_dim,))
        self._prev_obs = None
        self._prev_act = None
        self._ob_lb, self._ob_ub = self._env_spec.limits(['obs'])
        self._ob_lb, self._ob_ub = self._ob_lb[0], self._ob_ub[0]
        self.observation_space = spaces.Box(self._ob_lb, self._ob_ub)

        self._initial_goal_pos = params.initial_goal_pos
        self._curr_goal_pos = None

        self._cost = 0
        self._done = False  # indicates if the copter has completed/failed its rollout
        self._running = False  # indicates if a rollout is currently running

        # start with an assumption of collision to allow for takeoff
        self.collided = True

        self._prev_act = None
        self._ac_lb, self._ac_ub = self._env_spec.limits(['act'])
        self._ac_lb, self._ac_ub = self._ac_lb[0], self._ac_ub[0]
        self.action_space = spaces.Box(self._ac_lb, self._ac_ub)

        # ros callbacks
        self._curr_motion = CFMotion()

        self._ros_topics_and_types = {
            self._ros_prefix + 'coll': Bool,
            self._ros_prefix + 'data': CFData,
            'extcam/target_vector': Vector3Stamped,
        }

        self._ros_msgs = dict()
        self._ros_msg_times = dict()

        # ROS setup
        ru.setup_ros_node('MBRLNode', anonymous=True)
        for topic, type in self._ros_topics_and_types.items():
            ru.bind_subscriber(RosTopic(topic, type), self.ros_msg_update, callback_args=(topic,))

        # does not use these publishers if _offline
        _, self._ros_motion_pub = ru.get_publisher(RosTopic(self._ros_prefix + "motion", CFMotion), queue_size=10)
        _, self._ros_command_pub = ru.get_publisher(RosTopic(self._ros_prefix + "command", CFCommand), queue_size=10)
        _, self._ros_goal_pub = ru.get_publisher(RosTopic("/mpc/goal_vector", Vector3Stamped), queue_size=10)
        _, self._ros_action_pub = ru.get_publisher(RosTopic("/mpc/action_vector", Vector3Stamped), queue_size=10)
        _, self._ros_reward_pub = ru.get_publisher(RosTopic("/mpc/reward_vector", Vector3Stamped), queue_size=10)
        _, self._ros_latent_pub = ru.get_publisher(RosTopic("/mpc/latent_vector", Vector3Stamped), queue_size=10)

        _, self._ros_trajectory_marker_pub = ru.get_publisher(RosTopic("/mpc/trajectory_marker", Marker), queue_size=500)
        _, self._ros_ac_marker_pub = ru.get_publisher(RosTopic("/mpc/action_marker", Marker), queue_size=10)

        logger.info("[COPTER]: Initialized, waiting for topic streams before starting...")
        while not self.ros_is_good(display=False):
            pass
        logger.info("[COPTER]: Topics are active.")

        self._step_rate = rospy.Rate(1. / self._dt)  # used in step extra function
        self._rate = rospy.Rate(1. / self._dt)  # used only in background thread

        # this background publisher thread is only when we are online
        if not self.offline:
            ru.setup_background_thread(self._bg_loop_fn, self._rate)
            ru.start_background_thread()

        self.reset()
    def _step(self, u, no_rew_pub=False, step_extra_func=None, buffer_window_size=-1, **kwargs):
        self._running = True  # signifies that we are publishing actions now

        if not self.offline:
            assert self.ros_is_good(display=True)
        # else:
        #     u = self._latest_offline_ac.copy()

        obs = self._next_obs.copy()

        self._enforce_dimension(obs, u)

        # Limit the control inputs
        u0 = np.clip(u, self._ac_lb, self._ac_ub)

        # update vx, vy, dz (does NOT publish)
        self._set_motion(u0[0], u0[1], u0[2])

        self._done = np.all(obs == self._curr_goal_pos) or self.collided

        if np.any(np.isnan(obs)):
            logger.debug('[CF]: NAN POSITION')

        # publishing action messages happens only if we are fully online
        if not self.offline:
            if any((self._curr_motion.x, self._curr_motion.y, self._curr_motion.yaw,

                    self._curr_motion.dz)) or not self.collided:
                self._curr_motion.stamp.stamp = rospy.Time.now()
                self._ros_motion_pub.publish(self._curr_motion)
            else:
                # publish steady hold message when asking for zero motion
                motion = CFMotion()
                motion.is_flow_motion = True
                motion.stamp.stamp = rospy.Time.now()
                self._ros_motion_pub.publish(motion)
        else:
            # offline action publishing
            act_vec = Vector3Stamped()
            act_vec.header.stamp = rospy.Time.now()
            act_vec.vector.x = self._curr_motion.x
            act_vec.vector.y = self._curr_motion.y
            act_vec.vector.z = self._curr_motion.dz
            self._ros_action_pub.publish(act_vec)

        goal_vec = Vector3Stamped()
        goal_vec.header.stamp = rospy.Time.now()
        goal_vec.vector.x = self._curr_goal_pos[0]
        goal_vec.vector.y = self._curr_goal_pos[1]
        goal_vec.vector.z = self._curr_goal_pos[2]
        self._ros_goal_pub.publish(goal_vec)

        # TODO: set cost properly and in a single place
        if not no_rew_pub:
            self.pub_rew(-self._cost)

        ## EXTRA FUNCTIONALITY for inferring latent online
        ret = None
        if step_extra_func is not None:
            def exit_condition(iters): return self._step_rate.remaining().to_sec() < 1e-7 \
                                           and self._step_rate.sleep() is None
            ret = step_extra_func(exit_cond=exit_condition,
                                  buffer_window_size=buffer_window_size)  # returns at or before sleep_end_time
            mu = ret[0][0].item()
            sig = ret[1][0].exp().item()  # TODO do something with these
            latent_vec = Vector3Stamped()
            latent_vec.header.stamp = rospy.Time.now()
            latent_vec.vector.x = mu
            latent_vec.vector.y = sig
            self._ros_latent_pub.publish(latent_vec)
        else:
            self._step_rate.sleep()  # sleep until next iter

        # history update
        self._prev_obs = advance_history_np(self._prev_obs[None], obs[None])[0]
        self._prev_act = advance_history_np(self._prev_act[None], u0[None])[0]

        self._next_obs = self._obs.copy()  # the exact obs the policy sees
        return self.get_obs(), self.get_goal(), self._done
 def _log(self):
     logger.info('[Latent Training] Step: {}, Loss: {}, Steps/train (avg): {}'
                 .format(self._current_latent_step, self._current_latent_loss, self._exp_avg_steps_per_train))
     mu, lsig = self._model.get_online_latent_mu_logsig()
     logger.debug('[Latent Training] MU: %s, SIGMA: %s' % (to_numpy(mu).tolist(), to_numpy(lsig.exp()).tolist()))