def get_batch(self, indices=None, torch_device=None):
        # TODO fix this
        assert indices is None
        num_eps = len(self._datadict.done)  # number of episodes
        indices = np.random.choice(num_eps, self._batch_size, replace=False)

        sampled_datadict = self._datadict.leaf_apply(
            lambda list_of_arr: np.stack(
                [self.hor_chunk(list_of_arr[i]) for i in indices]))

        inputs = AttrDict()
        outputs = AttrDict()
        for key in self._env_spec.observation_names:
            inputs[key] = sampled_datadict[key]
        for key in self._env_spec.action_names:
            inputs[key] = sampled_datadict[key]

        for key in self._env_spec.output_observation_names:
            outputs[key] = sampled_datadict[key]

        outputs.done = sampled_datadict.done.astype(bool)

        if torch_device is not None:
            for d in (inputs, outputs):
                d.leaf_modify(lambda x: torch.from_numpy(x).to(torch_device))

        return inputs, outputs  # shape is (batch, horizon, name_dim...)
Beispiel #2
0
    def eval_act_sequence(self, model, action_seq, observations, goals):
        """ Finds predicted trajectory for a given batch of ac_sequences on given initial obs and prev_obs vectors
        Arguments:
            model: the underlying dynamics model
            observations: dotmap:(N x), initial observations (state, state hist, act hist, latent hist)
            action_seq: (N x H x dotmap{}), action sequences per initial observation
            goals: should be shape (N, H+1, dO) or broadcastable
        Returns: dictionary{ctrl_seq, traj_seq, cost, }
        """

        # TODO implement multiple particles

        # run the model forward on obs_start
        all_obs, all_mouts = rollout(self._env_spec, model, observations,
                                     action_seq, self._advance_obs_fn)

        # first unsqueezes and then concats
        all_obs = AttrDict.leaf_combine_and_apply(
            all_obs,
            func=lambda vs: torch.cat(vs, dim=1),
            map_func=lambda arr: arr.unsqueeze(1))
        all_mouts = AttrDict.leaf_combine_and_apply(
            all_mouts,
            func=lambda vs: torch.cat(vs, dim=1),
            map_func=lambda arr: arr.unsqueeze(1))
        costs = self._cost_fn(all_obs, goals, action_seq, all_mouts)

        return AttrDict(
            trajectory=all_obs,
            costs=costs  # (N,)
        )
    def get_action(self, model, observation, goals, batch=True):
        """
        Args:
            model (Model):
            observation (AttrDict):
            goal (AttrDict):
            batch (bool):

        Returns:
            AttrDict
        """
        if self._bg_policy is not None:
            action_dict = self._bg_policy.get_action(model,
                                                     observation,
                                                     goals,
                                                     batch=batch)
        else:
            action_dict = AttrDict()

        if batch:
            act = np.tile(self._latest_action[None],
                          (observation.obs.shape[0], 1))
        else:
            act = self._latest_action

        # save a copy of the bg action if its there
        if "act" in action_dict.keys():
            action_dict.bg_act = action_dict.act

        action_dict.act = to_torch(act)

        return action_dict
 def get_batch(self):
     """
     Returns:
         inputs (AttrDict)
         outputs (AttrDict)
     """
     raise NotImplementedError
     inputs = AttrDict()
     outputs = AttrDict()
     return inputs, outputs
    def _reset_curr_episode(self):
        self._curr_episode_obs = AttrDict()
        self._curr_episode_actions = AttrDict()
        self._curr_episode_goals = AttrDict()
        for name in self._model.env_spec.observation_names:
            self._curr_episode_obs[name] = []
        for name in self._model.env_spec.action_names:
            self._curr_episode_actions[name] = []
        for name in self._model.env_spec.goal_names:
            self._curr_episode_goals[name] = []

        self._curr_episode_dones = []
    def get_batch(self,
                  indices=None,
                  torch_device=None,
                  get_horizon_goals=False,
                  get_action_seq=False,
                  min_idx=0):
        # TODO fix this
        num_eps = len(self._datadict.done)  # number of episodes
        if indices is None:
            assert 0 <= min_idx < self._data_len
            batch = min(self._data_len - min_idx, self._batch_size)
            indices = np.random.choice(self._data_len - min_idx,
                                       batch,
                                       replace=False)
            indices += min_idx  # base index to consider in dataset

        # get current batch
        sampled_datadict = self._datadict.leaf_apply(lambda arr: arr[indices])

        inputs = AttrDict()
        outputs = AttrDict()
        goals = AttrDict()
        for key in self._env_spec.observation_names:
            inputs[key] = sampled_datadict[key]
        for key in self._env_spec.action_names:
            inputs[key] = sampled_datadict[key]

        for key in self._env_spec.output_observation_names:
            outputs[key] = sampled_datadict[key]

        outputs.done = sampled_datadict.done

        if torch_device is not None:
            for d in (inputs, outputs, goals):
                d.leaf_modify(lambda x: torch.from_numpy(x).to(torch_device))

        if get_action_seq:
            inputs['act_seq'] = torch.from_numpy(
                self._action_sequences[indices]).to(torch_device)

        if get_horizon_goals:
            for key in self._env_spec.goal_names:
                goals[key] = torch.from_numpy(
                    sampled_datadict[key]).to(torch_device)

        if get_horizon_goals:
            return inputs, outputs, goals
        return inputs, outputs  # shape is (batch, horizon, name_dim...)
    def _load_np(self):
        local_dict = AttrDict()

        if self._input_file is None:
            for key in self._all_names:
                local_dict[key] = []
            split_indices = np.array([])
        else:
            logger.debug('Loading ' + self._input_file)
            datadict = np.load(self._input_file,
                               mmap_mode="r",
                               allow_pickle=True)

            self._data_len = len(datadict['done'])
            split_indices = np.where(
                datadict['done'])[0] + 1  # one after each episode ends
            # remove the last chunk since it will be empty
            if 0 < self._data_len == split_indices[-1]:
                split_indices = np.delete(split_indices, -1)

            for key in self._all_names:
                assert key in datadict, f'{key} not in np file'
                assert len(datadict[key]) == self._data_len
                # split by episode
                local_dict[key] = np.split(datadict[key], split_indices)

            logger.debug('Dataset length: {}'.format(self._data_len))

        return local_dict, split_indices
    def forward(self, inputs, obs_lowd=None):
        """
        Given inputs, map them to the appropriate latent distribution

        :param inputs (AttrDict): holds obs, prev_obs, prev_act, latent and "act"
        :param training (bool):
        :return: AttrDict: parametrizes distribution of latents, holds mu, log_sigma
        """

        assert hasattr(inputs, 'latent')
        assert inputs.latent.dtype in [torch.short, torch.int, torch.long], \
            "Latent is type: " + str(inputs.latent.type())
        orig = inputs.latent.view(
            inputs.latent.shape[0])  # should be (batch, 1)
        # map latent classes to mu, log_sig
        mus = []
        log_sigs = []
        for latent_class in orig:
            # -1 class specifies online inference
            if latent_class.item() == -1:
                mus.append(self.online_mu)
                log_sigs.append(self.online_log_sigma)
            else:
                mus.append(self.__getattr__("mu_%d" % latent_class.item()))
                log_sigs.append(
                    self.__getattr__("log_sigma_%d" % latent_class.item()))

        mu = torch.stack(mus)
        log_sigma = torch.stack(log_sigs)

        # torch_distribution = D.normal.Normal(loc=mu, scale=log_sigma.exp())
        # sample = torch_distribution.rsample()  # sample from latent diagonal gaussian (reparam trick for gradient)
        sample = mu + torch.randn_like(mu) * log_sigma.exp()

        return AttrDict({'mu': mu, 'log_sigma': log_sigma, 'sample': sample})
Beispiel #9
0
 def resample_and_flatten(vs):
     old_acseq = vs[0]
     mean, std = vs[1], vs[2]
     sample = torch.randn_like(old_acseq) * std + mean
     d = AttrDict(act=sample)
     self._env_spec.clip(d, ['act'])
     return d.act.view([-1] + list(old_acseq.shape[2:]))
 def get_output_stats(self):
     return AttrDict({
         'mu': self._mu_obs.copy(),
         'mu_delta': self._mu_delta_obs.copy(),
         'sigma': self._sigma_obs.copy(),
         'sigma_delta': self._sigma_delta_obs.copy(),
     })
 def _env_step(self, env, dataset, obs, goal):
     # TODO implement online training
     action = self._policy.get_action(self._model, obs, goal)
     next_obs, next_goal, done = env.step(action)
     self._curr_episode_obs = AttrDict.leaf_combine_and_apply(
         [self._curr_episode_obs, next_obs], lambda vs: vs[0] + [vs[1]])
     self._curr_episode_actions = AttrDict.leaf_combine_and_apply(
         [self._curr_episode_actions, action], lambda vs: vs[0] + [vs[1]])
     self._curr_episode_goals = AttrDict.leaf_combine_and_apply(
         [self._curr_episode_goals, next_goal], lambda vs: vs[0] + [vs[1]])
     self._curr_episode_dones.append(done)
     if done:
         dataset.add_episode(self._curr_episode_obs, self._curr_episode_goals, self._curr_episode_actions,
                             self._curr_episode_dones)
         self._reset_curr_episode()
         next_obs, next_goal = env.reset()
     return next_obs, next_goal
 def get_goal(self):
     if self._use_future_goals and self._copter.horizon > 0:
         goal = self._copter.get_goal().goal_obs[0, 0]
         next_n = self.trajectory.try_next_n(self._copter.horizon)\
             .reshape(self._copter.horizon, self.x_dim)
         future_goals = np.concatenate([goal[None], next_n], axis=0)
         return self._env_spec.map_to_types(AttrDict(goal_obs=future_goals[None]))
     else:
         return self._copter.get_goal()
Beispiel #13
0
    def get_action(self, model, observation, goal, batch=False):
        """Optimizes the cost function provided in setup().
        Arguments:
            model: must be callable(action_sequence, observation, goal) and return cost (torch array)
                    where action is at AttrDict consisting of keys only in self.action_names
            observation: {}
            goal: {goal_obs} where goal_obs must be N x H+1 x ...
            batch:

        Returns:
            AttrDict with {action_sequence, results {costs, order} }
        """
        # generate random sequence
        batch_size = goal.goal_obs.shape[0]  # requires goal_obs to be a key
        device = goal.goal_obs.device

        if not batch:
            observation = observation.leaf_apply(lambda arr: arr.unsqueeze(
                0).repeat_interleave(self._pop_size, dim=0))
            goal = goal.leaf_apply(lambda arr: arr.unsqueeze(0).
                                   repeat_interleave(self._pop_size, dim=0))
        else:
            observation = observation.leaf_apply(
                lambda arr: arr.repeat_interleave(self._pop_size, dim=0))
            goal = goal.leaf_apply(
                lambda arr: arr.repeat_interleave(self._pop_size, dim=0))

        action_sequence = self._env_spec.get_uniform(
            self._action_names,
            batch_size=batch_size * self._pop_size * self._horizon)
        action_sequence.leaf_modify(lambda x: split_dim(
            torch.from_numpy(x).to(device),
            dim=0,
            new_shape=[batch_size * self._pop_size, self._horizon]))

        # run the model
        results = model(action_sequence, observation, goal)

        # view as (B, Pop, ...)
        action_sequence.leaf_modify(
            lambda x: split_dim(x, 0, [batch_size, self._pop_size]))
        results.leaf_modify(
            lambda x: split_dim(x, 0, [batch_size, self._pop_size]))

        results['order'] = torch.argsort(
            results.costs, dim=1)  # lowest to highest (best to worst)
        best = results.order[:, :1].unsqueeze(-1).unsqueeze(-1).expand(
            (-1, -1, self._horizon, self._act_dim))
        best_act_seq = action_sequence.leaf_apply(
            lambda x: torch.gather(x, 1, best))
        best_initial_act = best_act_seq.leaf_apply(
            lambda x: x[:, 0, 0])  # where x is (B, Pop, H ...)

        return AttrDict(act=best_initial_act,
                        action_sequence=action_sequence,
                        results=results)
    def _load_mat(self):
        local_dict = AttrDict()

        if self._input_file is None:
            local_dict = self._env_spec.get_zeros(self._all_names, 0)  # np
        else:
            logger.debug('Loading ' + self._input_file)
            samples = scipy.io.loadmat(self._input_file)

            # split into chunks by episode. dict = {key: list of [Ni, key_shape]}
            data_dict = split_data_by_episodes(samples,
                                               horizon=self._planning_horizon,
                                               n_obs=self._obs_history_length,
                                               n_acs=self._acs_history_length)

            self._mu_obs = data_dict['mu_obs']
            self._sigma_obs = data_dict['sigma_obs']
            self._mu_delta_obs = data_dict['mu_delta_obs']
            self._sigma_delta_obs = data_dict['sigma_delta_obs']

            self._action_sequences = np.concatenate(
                data_dict['act_seq'],
                axis=0).astype(self._env_spec.names_to_dtypes['act'])

            split_indices = np.cumsum(data_dict['episode_sizes'])
            # remove the last chunk since it will be empty
            split_indices = np.delete(split_indices, -1)
            if self._split_indices.size > 0:
                self._split_indices = np.concatenate([
                    self._split_indices,
                    np.array([self._data_len]), self._data_len + split_indices
                ],
                                                     axis=0)
            else:
                self._split_indices = split_indices

            self._num_episodes += len(data_dict['done'])
            self._data_len += np.sum(data_dict['episode_sizes'])
            logger.debug('Dataset length: {}'.format(self._data_len))

            for key in self._all_names:
                assert key in data_dict, f'{key} not in converted mat file'
                assert len(data_dict[key]) > 0
                # turn list into np array with the correct type
                local_dict[key] = np.concatenate(
                    data_dict[key],
                    axis=0).astype(self._env_spec.names_to_dtypes[key])
                assert local_dict[key].shape[1:] == self._env_spec.names_to_shapes[key], \
                    "Bad Data shape for {}: {}, requires {}" \
                        .format(key, local_dict[key].shape[1:], self._env_spec.names_to_shapes[key])
                # print(key, self._env_spec.names_to_shapes[key], local_dict[key].shape)
                assert local_dict[key].shape[0] == self._data_len, \
                    "Bad datalen for {}: {}, requires {}".format(key, local_dict[key].shape, self._data_len)

        return local_dict
Beispiel #15
0
def eval_policy(dataset, save_file_name):
    b_size = dataset.batch_size
    d_size = len(dataset)

    obs_all = []
    goals_all = []
    output_actions = []
    iters = math.ceil(d_size / b_size)
    for b in range(iters):
        logger.debug("[%d/%d]: Eval policy" % (b, iters))
        idxs = np.arange(start=b * b_size, stop=min((b + 1) * b_size, d_size))
        if args.random_goals:
            inputs, outputs = dataset.get_batch(indices=idxs,
                                                torch_device=model.device,
                                                get_horizon_goals=False)
            # this is to account for broadcasting to H+1 goals
            goals = env_spec.get_uniform(
                env_spec.goal_names, b_size,
                torch_device=model.device).unsqueeze(1)
        else:
            inputs, outputs, goals = dataset.get_batch(
                indices=idxs,
                torch_device=model.device,
                get_horizon_goals=True)

        # get obs batch
        obs = AttrDict()
        for name in env_spec.observation_names:
            obs[name] = inputs[name]

        act = policy.get_action(model, obs, goals, batch=True)

        goals_all.append(goals.leaf_apply(lambda v: to_numpy(v)))
        obs_all.append(obs.leaf_apply(lambda v: to_numpy(v)))
        output_actions.append(act.leaf_apply(lambda v: to_numpy(v)))

    # one big dictionary
    combined_obs = AttrDict.leaf_combine_and_apply(
        obs_all, lambda vs: np.concatenate(vs, axis=0))
    combined_goals = AttrDict.leaf_combine_and_apply(
        goals_all, lambda vs: np.concatenate(vs, axis=0))
    combined_output_actions = AttrDict.leaf_combine_and_apply(
        output_actions, lambda vs: np.concatenate(vs, axis=0))

    combined_obs.combine(combined_goals)
    combined_obs.combine(combined_output_actions)

    logger.debug("Saving Action Sequences")
    savemat(save_file_name, combined_obs)
Beispiel #16
0
    def get_action(self, model, observation, goal, batch=False):
        """Optimizes the cost function provided in setup().
        Arguments:
            model: must be callable(action_sequence, observation, goal) and return cost (torch array)
                    where action is at AttrDict consisting of keys only in self.action_names
            observation: {}
            goal: {goal_obs} where goal_obs must be N x H+1 x ...
            batch:

        Returns:
            AttrDict with {action_sequence, results {costs, order} }
        """
        # generate random sequence
        batch_size = goal.goal_obs.shape[0]  # requires goal_obs to be a key
        device = goal.goal_obs.device

        if not batch:
            observation = observation.leaf_apply(lambda arr: arr.unsqueeze(
                0).repeat_interleave(self._pop_size, dim=0))
            goal = goal.leaf_apply(lambda arr: arr.unsqueeze(0).
                                   repeat_interleave(self._pop_size, dim=0))
        else:
            observation = observation.leaf_apply(
                lambda arr: arr.repeat_interleave(self._pop_size, dim=0))
            goal = goal.leaf_apply(
                lambda arr: arr.repeat_interleave(self._pop_size, dim=0))

        action_sequence = self._env_spec.get_uniform(
            self._action_names,
            batch_size=batch_size * self._pop_size * self._horizon)
        action_sequence.leaf_modify(lambda x: split_dim(
            torch.from_numpy(x).to(device),
            dim=0,
            new_shape=[batch_size * self._pop_size, self._horizon]))

        def resample_and_flatten(vs):
            old_acseq = vs[0]
            mean, std = vs[1], vs[2]
            sample = torch.randn_like(old_acseq) * std + mean
            d = AttrDict(act=sample)
            self._env_spec.clip(d, ['act'])
            return d.act.view([-1] + list(old_acseq.shape[2:]))

        best_initial_act = None
        results = None
        for i in range(self._max_iters):
            # run the model
            results = model(action_sequence, observation, goal)

            # view as (B, Pop, ...)
            action_sequence.leaf_modify(
                lambda x: split_dim(x, 0, [batch_size, self._pop_size]))
            results.leaf_modify(
                lambda x: split_dim(x, 0, [batch_size, self._pop_size]))

            results.order = torch.argsort(results.costs,
                                          dim=1)  # lowest to highest

            best = results.order[:, :self._num_elites]
            best = best.unsqueeze(-1).unsqueeze(-1).expand(
                (-1, -1, self._horizon, self._act_dim))
            best_act_seq = action_sequence.leaf_apply(
                lambda x: torch.gather(x, 1, best))
            best_initial_act = best_act_seq.leaf_apply(
                lambda x: x[:, 0, 0])  # where x is (B, Pop, H ...)
            means = best_act_seq.leaf_apply(lambda x: x.mean(1, keepdim=True))
            stds = best_act_seq.leaf_apply(lambda x: x.std(1, keepdim=True))

            if i < self._max_iters - 1:
                # resampling
                action_sequence = AttrDict.leaf_combine_and_apply(
                    [action_sequence, means, stds], resample_and_flatten)

        # act is (B, actdim)
        best_initial_act.action_sequence = action_sequence  # (B*Pop, horizon, actdim)
        best_initial_act.results = results

        return best_initial_act
 def get_output_stats(self):
     return AttrDict()
def eval_model(dataset, save_file_name):
    b_size = dataset.batch_size
    d_size = len(dataset)

    pred_trajectories = []
    action_sequences = []
    true_trajectories = []
    costs = []

    iters = math.ceil(d_size / b_size)
    for b in range(iters):
        logger.debug("[%d/%d]: Eval model" % (b, iters))
        idxs = np.arange(start=b * b_size, stop=min((b + 1) * b_size, d_size))
        inputs, outputs, goals = dataset.get_batch(indices=idxs,
                                                   torch_device=model.device,
                                                   get_horizon_goals=True,
                                                   get_action_seq=True)

        # get obs batch
        obs = AttrDict()
        for name in env_spec.observation_names:
            obs[name] = inputs[name]

        act_seq = AttrDict()
        act_seq['act'] = inputs['act_seq']

        model.eval()
        all_obs, all_mouts = rollout(env_spec, model, obs, act_seq,
                                     policy._advance_obs_fn)

        # first unsqueezes and then concats
        all_obs = AttrDict.leaf_combine_and_apply(
            all_obs,
            func=lambda vs: torch.cat(vs, dim=1),
            map_func=lambda arr: arr.unsqueeze(1))
        all_mouts = AttrDict.leaf_combine_and_apply(
            all_mouts,
            func=lambda vs: torch.cat(vs, dim=1),
            map_func=lambda arr: arr.unsqueeze(1))

        cost_dict = AttrDict(
            {'costs': policy._cost_fn(all_obs, goals, act_seq, all_mouts)})

        true_trajectories.append(goals.leaf_apply(lambda v: to_numpy(v)))
        pred_trajectories.append(all_obs.leaf_apply(lambda v: to_numpy(v)))
        action_sequences.append(act_seq.leaf_apply(lambda v: to_numpy(v)))
        costs.append(cost_dict.leaf_apply(lambda v: to_numpy(v)))

    # one big dictionary
    final_dict = AttrDict.leaf_combine_and_apply(
        true_trajectories, lambda vs: np.concatenate(vs, axis=0))
    combined_pred = AttrDict.leaf_combine_and_apply(
        pred_trajectories, lambda vs: np.concatenate(vs, axis=0))
    combined_acts = AttrDict.leaf_combine_and_apply(
        action_sequences, lambda vs: np.concatenate(vs, axis=0))
    combined_costs = AttrDict.leaf_combine_and_apply(
        costs, lambda vs: np.concatenate(vs, axis=0))

    final_dict.combine(combined_pred)
    final_dict.combine(combined_acts)  # no overlapping keys
    final_dict.combine(combined_costs)

    logger.debug("Saving Model Trajectories")
    logger.debug("Keys: " + str(final_dict.keys()))
    savemat(save_file_name, final_dict)
 def get_goal(self):
     goal = AttrDict(goal_obs=np.tile(self._curr_goal_pos[None, None], (1, self.horizon + 1, 1)))
     return self._env_spec.map_to_types(goal)
 def get_obs(self):
     obs = AttrDict(obs=self._obs[None].copy(),
                    prev_obs=self._prev_obs[None].copy(),
                    prev_act=self._prev_act[None].copy(),
                    latent=-np.ones((1, 1)))  # -1 specifies online
     return self._env_spec.map_to_types(obs)