예제 #1
0
    def _get_classifier_feed_dict(self):
        negatives = self.sampler.random_batch(
            self._classifier_batch_size)['observations']
        rand_positive_ind = np.random.randint(
            self._goal_examples[next(iter(self._goal_examples))].shape[0],
            size=self._classifier_batch_size)
        positives = {
            key: values[rand_positive_ind]
            for key, values in self._goal_examples.items()
        }

        labels_batch = np.zeros((2*self._classifier_batch_size, 1))
        labels_batch[self._classifier_batch_size:] = 1.0

        observations_batch = {
            key: np.concatenate((negatives[key], positives[key]), axis=0)
            for key in self._classifier.observation_keys
        }

        if self._mixup_alpha > 0:
            observation_batch, labels_batch = mixup(
                observations_batch, labels_batch, alpha=self._mixup_alpha)

        feed_dict = {
            **{
                self._placeholders['observations'][key]:
                observations_batch[key]
                for key in self._classifier.observation_keys
            },
            self._placeholders['labels']: labels_batch
        }

        return feed_dict
예제 #2
0
    def _get_classifier_feed_dict(self):
        rand_positive_ind = np.random.randint(self._goal_examples.shape[0],
                                              size=self._classifier_batch_size)
        rand_negative_ind = np.random.randint(self._negative_examples.shape[0],
                                              size=self._classifier_batch_size)

        positives = self._goal_examples[rand_positive_ind]
        negatives = self._negative_examples[rand_negative_ind]

        labels_batch = np.zeros((2 * self._classifier_batch_size, 1))
        labels_batch[self._classifier_batch_size:] = 1.0
        observation_batch = np.concatenate([negatives, positives], axis=0)

        if self._mixup_alpha > 0:
            observation_batch, labels_batch = mixup(observation_batch,
                                                    labels_batch,
                                                    alpha=self._mixup_alpha)

        feed_dict = {
            **{
                self._placeholders['observations'][name]: observation_batch[name]
                for name in self._classifier.observation_keys
            },
            self._placeholders['labels']: labels_batch,
        }

        return feed_dict
예제 #3
0
    def _get_classifier_feed_dict(self):
        negatives = self.sampler.random_batch(
            self._classifier_batch_size)['observations']
        rand_positive_ind = np.random.randint(self._goal_examples.shape[0],
                                              size=self._classifier_batch_size)
        positives = self._goal_examples[rand_positive_ind]

        labels_batch = np.zeros((2 * self._classifier_batch_size, 2),
                                dtype=np.int32)
        # Label negatives with [1 0]
        labels_batch[:self._classifier_batch_size, 0] = 1
        # Label goal exmaples with [0 1]
        labels_batch[self._classifier_batch_size:, 1] = 1

        observation_batch = np.concatenate([negatives, positives], axis=0)
        if self._image_only:
            total_dim = observation_batch.shape[1]
            if total_dim != 3090:
                raise NotImplementedError
            image_dim = 48 * 48 * 3
            # Set everything else to be 0, remove all other parts of the state
            observation_batch[:, image_dim:] = 0.0

        if self._mixup_alpha > 0:
            observation_batch, labels_batch = mixup(observation_batch,
                                                    labels_batch,
                                                    alpha=self._mixup_alpha)

        feed_dict = {
            self._observations_ph: observation_batch,
            self._label_ph: labels_batch
        }

        return feed_dict
    def _get_classifier_feed_dict(self):
        negatives = self.sampler.random_batch(
            self._classifier_batch_size)['observations']

        state_goal_size = negatives.shape[1]
        assert state_goal_size % 2 == 0, (
            "States and goals should be concatenated together,"
            " so the total space has to be even.")

        state_size = int(state_goal_size / 2)
        positives = np.concatenate(
            (negatives[:, state_size:], negatives[:, state_size:]), axis=1)

        labels_batch = np.zeros((2 * self._classifier_batch_size, 2),
                                dtype=np.int32)
        labels_batch[:self._classifier_batch_size, 0] = 1
        labels_batch[self._classifier_batch_size:, 1] = 1
        observation_batch = np.concatenate([negatives, positives], axis=0)

        if self._mixup_alpha > 0:
            observation_batch, labels_batch = mixup(observation_batch,
                                                    labels_batch,
                                                    alpha=self._mixup_alpha)

        feed_dict = {
            self._observations_ph: observation_batch,
            self._label_ph: labels_batch
        }

        return feed_dict
    def _get_classifier_feed_dict(self):
        negatives_batch = self.sampler.random_batch(
            self._classifier_batch_size)

        negatives_obs = negatives_batch['observations']
        negatives_act = negatives_batch['actions']

        _key = next(iter(self._goal_examples['observations']))
        rand_positive_ind = np.random.randint(
            self._goal_examples['observations'][_key].shape[0],
            size=self._classifier_batch_size)
        positives_obs = {
            key: values[rand_positive_ind]
            for key, values in self._goal_examples['observations'].items()
        }
        positives_act = self._goal_examples['actions'][rand_positive_ind]

        labels_batch = np.zeros((2 * self._classifier_batch_size, 2),
                                dtype=np.int32)
        labels_batch[:self._classifier_batch_size, 0] = 1
        labels_batch[self._classifier_batch_size:, 1] = 1

        observations_batch = {
            key: np.concatenate((negatives_obs[key], positives_obs[key]),
                                axis=0)
            for key in self._policy.observation_keys
        }
        actions_batch = np.concatenate([negatives_act, positives_act], axis=0)

        if self._mixup_alpha > 0:
            observations_batch, labels_batch, permutation_idx = (mixup(
                observations_batch,
                labels_batch,
                alpha=self._mixup_alpha,
                return_permutation=True))
            actions_batch = actions_batch[permutation_idx]

        feed_dict = {
            **{
                self._placeholders['observations'][key]: observations_batch[key]
                for key in self._policy.observation_keys
            },
            self._placeholders['actions']: actions_batch,
            self._placeholders['labels']: labels_batch,
        }

        return feed_dict
예제 #6
0
    def _get_classifier_feed_dict(self):
        import ipdb
        ipdb.set_trace()

        negatives = self.sampler.random_batch(
            self._classifier_batch_size)['observations']
        #rand_positive_ind = np.random.randint(self._goal_examples.shape[0], size=self._classifier_batch_size)
        #positives = self._goal_examples[rand_positive_ind]

        state_goal_size = negatives[next(iter(negatives.keys()))].shape[1]

        #state_goal_size = negatives.shape[1]
        #assert state_goal_size%2 == 0, 'States and goals should be concatenated together, \
        #    so the total space has to be even'

        state_size = int(state_goal_size / 2)
        #positives = np.concatenate([neg_observations[:, state_size:], neg_observations[:, state_size:]], axis=1)
        # this concatenates the goal examples from the environment
        positives = {
            key: np.concatenate(
                [negatives[key][:, :, :, 3:], negatives[key][:, :, :, 3:]],
                axis=3)
            for key in self._classifier.observation_keys
        }

        labels_batch = np.zeros((2 * self._classifier_batch_size, 1))
        labels_batch[self._classifier_batch_size:] = 1.0

        observation_batch = {
            key: np.concatenate((negatives[key], positives[key]), axis=0)
            for key in self._classifier.observation_keys
        }

        if self._mixup_alpha > 0:
            observation_batch, labels_batch = mixup(observation_batch,
                                                    labels_batch,
                                                    alpha=self._mixup_alpha)

        feed_dict = {
            **{
                self._placeholders['observations'][key]: observation_batch[key]
                for key in self._classifier.observation_keys
            }, self._placeholders['labels']: labels_batch
        }

        return feed_dict
예제 #7
0
    def _get_classifier_feed_dict(self):

        negatives = self.sampler.random_batch(self._classifier_batch_size)['observations']
        rand_positive_ind = np.random.randint(self._goal_examples.shape[0], size=self._classifier_batch_size)
        positives = self._goal_examples[rand_positive_ind]

        labels_batch = np.zeros((2*self._classifier_batch_size,2), dtype=np.int32)
        labels_batch[:self._classifier_batch_size, 0] = 1
        labels_batch[self._classifier_batch_size:, 1] = 1
        observation_batch = np.concatenate([negatives, positives], axis=0)

        if self._mixup_alpha > 0:
            observation_batch, labels_batch = mixup(observation_batch, labels_batch, alpha=self._mixup_alpha)

        feed_dict = {
            self._observations_ph: observation_batch,
            self._label_ph: labels_batch
        }

        return feed_dict
예제 #8
0
    def _get_classifier_feed_dict(self):
        rand_positive_ind = np.random.randint(
            self._goal_examples.shape[0],
            size=self._classifier_batch_size)
        rand_negative_ind = np.random.randint(
            self._negative_examples.shape[0], 
            size=self._classifier_batch_size)

        positives = self._goal_examples[rand_positive_ind]
        negatives = self._negative_examples[rand_negative_ind]

        labels_batch = np.zeros((2*self._classifier_batch_size,1))
        labels_batch[self._classifier_batch_size:] = 1.0
        observation_batch = np.concatenate([negatives, positives], axis=0)
        
        if self._mixup_alpha > 0:
            observation_batch, labels_batch = mixup(observation_batch, labels_batch, alpha=self._mixup_alpha)
        feed_dict = {
            self._observations_ph: observation_batch, 
            self._label_ph: labels_batch,
        }

        return feed_dict
예제 #9
0
    def _get_classifier_feed_dicts(self):
        # Get 2x the number of negatives, filter out by goal index
        negatives = self.sampler.random_batch(
            2 * self._classifier_batch_size)['observations']

        negative_ind_0 = (negatives['goal_index'] == 0).flatten()
        negative_ind_1 = (negatives['goal_index'] == 1).flatten()
        n_negatives_0, n_negatives_1 = (
            np.sum(negative_ind_0.astype(int)), np.sum(negative_ind_1.astype(int)))
        negatives_0 = {
            key: values[negative_ind_0]
            for key, values in negatives.items()
        }
        negatives_1 = {
            key: values[negative_ind_1]
            for key, values in negatives.items()
        }

        # Get positives from different goal pools
        rand_positive_ind_0 = np.random.randint(
            self._goal_examples_0[next(iter(self._goal_examples_0))].shape[0],
            size=self._classifier_batch_size)
        positives_0 = {
            key: values[rand_positive_ind_0]
            for key, values in self._goal_examples_0.items()
        }

        rand_positive_ind_1 = np.random.randint(
            self._goal_examples_1[next(iter(self._goal_examples_1))].shape[0],
            size=self._classifier_batch_size)
        positives_1 = {
            key: values[rand_positive_ind_1]
            for key, values in self._goal_examples_1.items()
        }

        # labels_batch_0 = np.zeros((2 * self._classifier_batch_size, 1))
        # labels_batch[self._classifier_batch_size:] = 1.0
        # labels_batch_0 = labels_batch.copy()
        # labels_batch_1 = labels_batch.copy()

        labels_batch_0 = np.zeros((n_negatives_0 + self._classifier_batch_size, 1))
        labels_batch_0[n_negatives_0:] = 1.0
        labels_batch_1 = np.zeros((n_negatives_1 + self._classifier_batch_size, 1))
        labels_batch_1[n_negatives_1:] = 1.0

        observations_batch_0 = {
            key: np.concatenate((negatives_0[key], positives_0[key]), axis=0)
            for key in self._classifier_0.observation_keys
        }
        observations_batch_1 = {
            key: np.concatenate((negatives_1[key], positives_1[key]), axis=0)
            for key in self._classifier_1.observation_keys
        }

        if self._mixup_alpha > 0:
            observation_batch_0, labels_batch_0 = mixup(
                observations_batch_0, labels_batch_0, alpha=self._mixup_alpha)
            observation_batch_1, labels_batch_1 = mixup(
                observations_batch_1, labels_batch_1, alpha=self._mixup_alpha)

        feed_dict_0 = {
            **{
                self._placeholders['observations'][key]:
                observations_batch_0[key]
                for key in self._classifier_0.observation_keys
            },
            self._placeholders['labels']: labels_batch_0
        }

        feed_dict_1 = {
            **{
                self._placeholders['observations'][key]:
                observations_batch_1[key]
                for key in self._classifier_1.observation_keys
            },
            self._placeholders['labels']: labels_batch_1
        }

        return feed_dict_0, feed_dict_1
예제 #10
0
    def _get_classifier_feed_dict(self):
        negatives = self.sampler.random_batch(
            self._classifier_batch_size
        )['observations']

        if self._positive_on_first_occurence:
            # Still things left to explore
            env = self._training_environment.unwrapped
            first_occ_idxs = []
            for i in range(len(negatives[next(iter(negatives))])):
                x_d, y_d = env._discretize_observation({
                    key: val[i]
                    for key, val in negatives.items()
                })
                if not self._seen_states[x_d][y_d]:
                # if (x_d, y_d) not in self._seen_states:
                    first_occ_idxs.append(i)
                    # self._seen_states.add((x_d, y_d))
                    self._seen_states[x_d][y_d] = True

        # DEBUG: Testing with the same negatives pool for each training iteration
        # negatives = type(self._pool.data)(
        #     (key[1], value[:self._classifier_batch_size])
        #     for key, value in self._pool.data.items()
        #     if key[0] == 'observations')

        rand_positive_ind = np.random.randint(
            self._goal_examples[next(iter(self._goal_examples))].shape[0],
            size=self._classifier_batch_size)
        positives = {
            key: values[rand_positive_ind]
            for key, values in self._goal_examples.items()
        }
        if self._positive_on_first_occurence:
            positives = {
                key: np.concatenate([val, negatives[key][first_occ_idxs]], axis=0)
                for key, val in positives.items()
            }
            labels_batch = np.zeros(
                (self._classifier_batch_size +
                (self._classifier_batch_size + len(first_occ_idxs)), 2),
                dtype=np.int32)
            labels_batch[:self._classifier_batch_size, 0] = 1
            labels_batch[self._classifier_batch_size:, 1] = 1

        else:
            labels_batch = np.zeros(
                (2 * self._classifier_batch_size, 2),
                dtype=np.int32)
            labels_batch[:self._classifier_batch_size, 0] = 1
            labels_batch[self._classifier_batch_size:, 1] = 1

        observations_batch = {
            key: np.concatenate((negatives[key], positives[key]), axis=0)
            # for key in self._classifier.observation_keys
            for key in self._policy.observation_keys
        }

        if self._mixup_alpha > 0:
            observations_batch, labels_batch = mixup(
                observations_batch, labels_batch, alpha=self._mixup_alpha)

        feed_dict = {
            **{
                self._placeholders['observations'][key]:
                observations_batch[key]
                # for key in self._classifier.observation_keys
                for key in self._policy.observation_keys
            },
            self._placeholders['labels']: labels_batch,
        }

        return feed_dict
예제 #11
0
    def _get_classifier_feed_dicts(self):
        # Sample N x the normal amount of observations, where N is
        # the number of goals.
        negatives = self.sampler.random_batch(
            self._num_goals * self._classifier_batch_size)['observations']

        # Split up the sample observations based on the goal index.
        # TODO: Make it split based on the goal qpos
        negative_inds = [(negatives['goal_index'] == goal).flatten()
                         for goal in range(self._num_goals)]
        negatives_per_goal = [{
            key: values[negative_ind]
            for key, values in negatives.items()
        } for negative_ind in negative_inds]

        # Get positives from different goal pools
        goal_example_pool_sizes = [
            goal_example_pool[next(iter(goal_example_pool.keys()))].shape[0]
            for goal_example_pool in self._goal_example_pools
        ]
        rand_positive_indices = [
            np.random.randint(goal_example_pool_size,
                              size=self._classifier_batch_size)
            for goal_example_pool_size in goal_example_pool_sizes
        ]
        positives_per_goal = [{
            key: values[rand_positive_ind]
            for key, values in goal_examples.items()
        } for rand_positive_ind, goal_examples in zip(
            rand_positive_indices, self._goal_example_pools)]

        labels_batches = []
        for goal in range(self._num_goals):
            n_negatives = np.sum(negative_inds[goal].astype(int))
            n_positives = self._classifier_batch_size
            labels_batch = np.concatenate([
                np.zeros((n_negatives, 1)),
                np.ones((n_positives, 1)),
            ])
            labels_batches.append(labels_batch)

        # labels_batch = np.zeros((2 * self._classifier_batch_size, 1))
        # labels_batch[self._classifier_batch_size:] = 1.0
        # labels_batches = [labels_batch.copy() for _ in range(self._num_goals)]

        observation_batches = [{
            key: np.concatenate((_negatives[key], _positives[key]), axis=0)
            for key in self._classifiers[0].observation_keys
        }
                               for _negatives, _positives in zip(
                                   negatives_per_goal, positives_per_goal)]

        if self._mixup_alpha > 0:
            for goal_index in range(self._num_goals):
                observation_batches[goal_index], labels_batches[
                    goal_index] = mixup(observation_batches[goal_index],
                                        labels_batches[goal_index],
                                        alpha=self._mixup_alpha)

        feed_dicts = [{
            **{
                self._placeholders['observations'][key]: observations_batch[key]
                for key in self._classifiers[0].observation_keys
            }, self._placeholders['labels']: labels_batch
        }
                      for observations_batch, labels_batch in zip(
                          observation_batches, labels_batches)]

        return feed_dicts