def get_grids(self, xy_range=None, z=0):
        if xy_range is not None:
            min_xy = xy_range[:, 0] / self._bin_width
            max_xy = xy_range[:, 1] / self._bin_width
        else:
            min_xy = np.full(2, np.inf)
            max_xy = np.full(2, -np.inf)
            for z in range(self._num_skills):
                cells = np.array(
                    [eval(key) for key in self._counter[z].keys()])
                if len(cells) > 0:
                    max_xy = np.max([max_xy, np.max(cells, axis=0)], axis=0)
                    min_xy = np.min([min_xy, np.min(cells, axis=0)], axis=0)

        grids = []
        grid_size_x = int(max_xy[0] - min_xy[0])
        grid_size_y = int(max_xy[1] - min_xy[1])
        for z in range(self._num_skills):
            grid = np.zeros((grid_size_x, grid_size_y))
            for i, x in enumerate(
                    np.linspace(min_xy[0], max_xy[0], grid_size_x)):
                for j, y in enumerate(
                        np.linspace(min_xy[1], max_xy[1], grid_size_y)):
                    ob = np.array([x, y]) * self._bin_width
                    ob = utils.concat_ob_z(ob, z, self._num_skills)
                    grid[i, j] = self._get_output_for_ob(ob)
            grids.append(grid)
        grids = np.array(grids)

        return grids
 def get_grids_1d(self, x_range, z=0):
     x_range = x_range / self._bin_width
     grids = []
     x_grids = []
     grid_size_x = int(x_range[1] - x_range[0])
     for z in range(self._num_skills):
         grid = np.zeros(grid_size_x)
         x_grid = np.zeros(grid_size_x)
         for i, x in enumerate(
                 np.linspace(x_range[0], x_range[1], grid_size_x)):
             ob = np.array([x]) * self._bin_width
             ob = utils.concat_ob_z(ob, z, self._num_skills)
             grid[i] = self._get_output_for_ob(ob)
             x_grid[i] = x * self._bin_width
         grids.append(grid)
         x_grids.append(x_grid)
     grids = np.array(grids)
     x_grids = np.array(x_grids)
     return grids, x_grids
 def get_action(polself, ob):
     aug_ob = utils.concat_ob_z(ob, polself._z,
                                polself._num_skills)
     return polself._policy.get_action(aug_ob)
Beispiel #4
0
 def _proc_observation(self, ob, z=None):
     if z is None:
         z = self._current_rollout_z
     return utils.concat_ob_z(ob, z, self.num_skills)
Beispiel #5
0
    def _process_pos_statistics(self,
                                z_to_pos_list,
                                pos_axis=None,
                                histogram_bin_width=0.05,
                                statistics=None,
                                name='pos',
                                compute_kl=False):
        """
        Args:
          statistics (OrderedDict)
          z_to_pos_list (defaultdict(list)) is keyed by skill z.
        """
        # Fit a histogram density model.
        num_skills = max(z_to_pos_list.keys()) + 1
        histogram = DiscretizedDensity(num_skills=1,
                                       bin_width=histogram_bin_width)
        histogram_z = DiscretizedDensity(num_skills=num_skills,
                                         bin_width=histogram_bin_width)
        obs_list = []
        obs_z_list = []
        pos_list = []
        for z, pos_list_z in z_to_pos_list.items():
            for pos in pos_list_z:
                if pos_axis is not None:
                    pos = [pos[i] for i in pos_axis]
                obs_list.append(concat_ob_z(pos, z=0, num_skills=1))
                obs_z_list.append(concat_ob_z(pos, z=z, num_skills=num_skills))
                pos_list.append(pos)
        obs_list = ptu.from_numpy(np.array(obs_list))
        obs_z_list = ptu.from_numpy(np.array(obs_z_list))

        histogram.update(obs_list)
        histogram_z.update(obs_z_list)

        if statistics:
            pos_entropy = -1 * ptu.get_numpy(
                histogram.get_output_for(obs_list))
            pos_entropy_mean = np.mean(pos_entropy, axis=0)[0]
            pos_entropy_std = np.std(pos_entropy, axis=0)[0]
            statistics['ManipulationEnv.{}_H[s]_mean'.format(
                name)] = pos_entropy_mean
            statistics['ManipulationEnv.{}_H[s]_std'.format(
                name)] = pos_entropy_std

            # Compute KL[pi(s) | p*(s)].
            if compute_kl and self._goal_prior is not None:
                kl_terms = []
                for i, pos in enumerate(pos_list):
                    # Ignore off-table object positions.
                    if not ManipulationEnv.is_off_table(pos):
                        log_pi = pos_entropy[i]
                        log_p = self._compute_reward_object_goal_indicator(pos)
                        kl_terms.append(log_p - log_pi)
                statistics['ManipulationEnv.KL[pi(object_pos)||p*(object_pos)]'
                           .format(name)] = np.mean(kl_terms)

            pos_list = np.array(pos_list)
            pos_min = np.min(pos_list, axis=0)
            pos_max = np.max(pos_list, axis=0)
            pos_mean = np.mean(pos_list, axis=0)
            pos_std = np.std(pos_list, axis=0)
            for i in range(3):
                statistics['ManipulationEnv.{}_min_axis{}'.format(
                    name, i)] = pos_min[i]
                statistics['ManipulationEnv.{}_max_axis{}'.format(
                    name, i)] = pos_max[i]
            for i in range(3):
                statistics['ManipulationEnv.{}_mean_axis{}'.format(
                    name, i)] = pos_mean[i]
            for i in range(3):
                statistics['ManipulationEnv.{}_std_axis{}'.format(
                    name, i)] = pos_std[i]
        return histogram, histogram_z