def _get_classifier_feed_dict(self): negatives = self.sampler.random_batch( self._classifier_batch_size)['observations'] rand_positive_ind = np.random.randint( self._goal_examples[next(iter(self._goal_examples))].shape[0], size=self._classifier_batch_size) positives = { key: values[rand_positive_ind] for key, values in self._goal_examples.items() } labels_batch = np.zeros((2*self._classifier_batch_size, 1)) labels_batch[self._classifier_batch_size:] = 1.0 observations_batch = { key: np.concatenate((negatives[key], positives[key]), axis=0) for key in self._classifier.observation_keys } if self._mixup_alpha > 0: observation_batch, labels_batch = mixup( observations_batch, labels_batch, alpha=self._mixup_alpha) feed_dict = { **{ self._placeholders['observations'][key]: observations_batch[key] for key in self._classifier.observation_keys }, self._placeholders['labels']: labels_batch } return feed_dict
def _get_classifier_feed_dict(self): rand_positive_ind = np.random.randint(self._goal_examples.shape[0], size=self._classifier_batch_size) rand_negative_ind = np.random.randint(self._negative_examples.shape[0], size=self._classifier_batch_size) positives = self._goal_examples[rand_positive_ind] negatives = self._negative_examples[rand_negative_ind] labels_batch = np.zeros((2 * self._classifier_batch_size, 1)) labels_batch[self._classifier_batch_size:] = 1.0 observation_batch = np.concatenate([negatives, positives], axis=0) if self._mixup_alpha > 0: observation_batch, labels_batch = mixup(observation_batch, labels_batch, alpha=self._mixup_alpha) feed_dict = { **{ self._placeholders['observations'][name]: observation_batch[name] for name in self._classifier.observation_keys }, self._placeholders['labels']: labels_batch, } return feed_dict
def _get_classifier_feed_dict(self): negatives = self.sampler.random_batch( self._classifier_batch_size)['observations'] rand_positive_ind = np.random.randint(self._goal_examples.shape[0], size=self._classifier_batch_size) positives = self._goal_examples[rand_positive_ind] labels_batch = np.zeros((2 * self._classifier_batch_size, 2), dtype=np.int32) # Label negatives with [1 0] labels_batch[:self._classifier_batch_size, 0] = 1 # Label goal exmaples with [0 1] labels_batch[self._classifier_batch_size:, 1] = 1 observation_batch = np.concatenate([negatives, positives], axis=0) if self._image_only: total_dim = observation_batch.shape[1] if total_dim != 3090: raise NotImplementedError image_dim = 48 * 48 * 3 # Set everything else to be 0, remove all other parts of the state observation_batch[:, image_dim:] = 0.0 if self._mixup_alpha > 0: observation_batch, labels_batch = mixup(observation_batch, labels_batch, alpha=self._mixup_alpha) feed_dict = { self._observations_ph: observation_batch, self._label_ph: labels_batch } return feed_dict
def _get_classifier_feed_dict(self): negatives = self.sampler.random_batch( self._classifier_batch_size)['observations'] state_goal_size = negatives.shape[1] assert state_goal_size % 2 == 0, ( "States and goals should be concatenated together," " so the total space has to be even.") state_size = int(state_goal_size / 2) positives = np.concatenate( (negatives[:, state_size:], negatives[:, state_size:]), axis=1) labels_batch = np.zeros((2 * self._classifier_batch_size, 2), dtype=np.int32) labels_batch[:self._classifier_batch_size, 0] = 1 labels_batch[self._classifier_batch_size:, 1] = 1 observation_batch = np.concatenate([negatives, positives], axis=0) if self._mixup_alpha > 0: observation_batch, labels_batch = mixup(observation_batch, labels_batch, alpha=self._mixup_alpha) feed_dict = { self._observations_ph: observation_batch, self._label_ph: labels_batch } return feed_dict
def _get_classifier_feed_dict(self): negatives_batch = self.sampler.random_batch( self._classifier_batch_size) negatives_obs = negatives_batch['observations'] negatives_act = negatives_batch['actions'] _key = next(iter(self._goal_examples['observations'])) rand_positive_ind = np.random.randint( self._goal_examples['observations'][_key].shape[0], size=self._classifier_batch_size) positives_obs = { key: values[rand_positive_ind] for key, values in self._goal_examples['observations'].items() } positives_act = self._goal_examples['actions'][rand_positive_ind] labels_batch = np.zeros((2 * self._classifier_batch_size, 2), dtype=np.int32) labels_batch[:self._classifier_batch_size, 0] = 1 labels_batch[self._classifier_batch_size:, 1] = 1 observations_batch = { key: np.concatenate((negatives_obs[key], positives_obs[key]), axis=0) for key in self._policy.observation_keys } actions_batch = np.concatenate([negatives_act, positives_act], axis=0) if self._mixup_alpha > 0: observations_batch, labels_batch, permutation_idx = (mixup( observations_batch, labels_batch, alpha=self._mixup_alpha, return_permutation=True)) actions_batch = actions_batch[permutation_idx] feed_dict = { **{ self._placeholders['observations'][key]: observations_batch[key] for key in self._policy.observation_keys }, self._placeholders['actions']: actions_batch, self._placeholders['labels']: labels_batch, } return feed_dict
def _get_classifier_feed_dict(self): import ipdb ipdb.set_trace() negatives = self.sampler.random_batch( self._classifier_batch_size)['observations'] #rand_positive_ind = np.random.randint(self._goal_examples.shape[0], size=self._classifier_batch_size) #positives = self._goal_examples[rand_positive_ind] state_goal_size = negatives[next(iter(negatives.keys()))].shape[1] #state_goal_size = negatives.shape[1] #assert state_goal_size%2 == 0, 'States and goals should be concatenated together, \ # so the total space has to be even' state_size = int(state_goal_size / 2) #positives = np.concatenate([neg_observations[:, state_size:], neg_observations[:, state_size:]], axis=1) # this concatenates the goal examples from the environment positives = { key: np.concatenate( [negatives[key][:, :, :, 3:], negatives[key][:, :, :, 3:]], axis=3) for key in self._classifier.observation_keys } labels_batch = np.zeros((2 * self._classifier_batch_size, 1)) labels_batch[self._classifier_batch_size:] = 1.0 observation_batch = { key: np.concatenate((negatives[key], positives[key]), axis=0) for key in self._classifier.observation_keys } if self._mixup_alpha > 0: observation_batch, labels_batch = mixup(observation_batch, labels_batch, alpha=self._mixup_alpha) feed_dict = { **{ self._placeholders['observations'][key]: observation_batch[key] for key in self._classifier.observation_keys }, self._placeholders['labels']: labels_batch } return feed_dict
def _get_classifier_feed_dict(self): negatives = self.sampler.random_batch(self._classifier_batch_size)['observations'] rand_positive_ind = np.random.randint(self._goal_examples.shape[0], size=self._classifier_batch_size) positives = self._goal_examples[rand_positive_ind] labels_batch = np.zeros((2*self._classifier_batch_size,2), dtype=np.int32) labels_batch[:self._classifier_batch_size, 0] = 1 labels_batch[self._classifier_batch_size:, 1] = 1 observation_batch = np.concatenate([negatives, positives], axis=0) if self._mixup_alpha > 0: observation_batch, labels_batch = mixup(observation_batch, labels_batch, alpha=self._mixup_alpha) feed_dict = { self._observations_ph: observation_batch, self._label_ph: labels_batch } return feed_dict
def _get_classifier_feed_dict(self): rand_positive_ind = np.random.randint( self._goal_examples.shape[0], size=self._classifier_batch_size) rand_negative_ind = np.random.randint( self._negative_examples.shape[0], size=self._classifier_batch_size) positives = self._goal_examples[rand_positive_ind] negatives = self._negative_examples[rand_negative_ind] labels_batch = np.zeros((2*self._classifier_batch_size,1)) labels_batch[self._classifier_batch_size:] = 1.0 observation_batch = np.concatenate([negatives, positives], axis=0) if self._mixup_alpha > 0: observation_batch, labels_batch = mixup(observation_batch, labels_batch, alpha=self._mixup_alpha) feed_dict = { self._observations_ph: observation_batch, self._label_ph: labels_batch, } return feed_dict
def _get_classifier_feed_dicts(self): # Get 2x the number of negatives, filter out by goal index negatives = self.sampler.random_batch( 2 * self._classifier_batch_size)['observations'] negative_ind_0 = (negatives['goal_index'] == 0).flatten() negative_ind_1 = (negatives['goal_index'] == 1).flatten() n_negatives_0, n_negatives_1 = ( np.sum(negative_ind_0.astype(int)), np.sum(negative_ind_1.astype(int))) negatives_0 = { key: values[negative_ind_0] for key, values in negatives.items() } negatives_1 = { key: values[negative_ind_1] for key, values in negatives.items() } # Get positives from different goal pools rand_positive_ind_0 = np.random.randint( self._goal_examples_0[next(iter(self._goal_examples_0))].shape[0], size=self._classifier_batch_size) positives_0 = { key: values[rand_positive_ind_0] for key, values in self._goal_examples_0.items() } rand_positive_ind_1 = np.random.randint( self._goal_examples_1[next(iter(self._goal_examples_1))].shape[0], size=self._classifier_batch_size) positives_1 = { key: values[rand_positive_ind_1] for key, values in self._goal_examples_1.items() } # labels_batch_0 = np.zeros((2 * self._classifier_batch_size, 1)) # labels_batch[self._classifier_batch_size:] = 1.0 # labels_batch_0 = labels_batch.copy() # labels_batch_1 = labels_batch.copy() labels_batch_0 = np.zeros((n_negatives_0 + self._classifier_batch_size, 1)) labels_batch_0[n_negatives_0:] = 1.0 labels_batch_1 = np.zeros((n_negatives_1 + self._classifier_batch_size, 1)) labels_batch_1[n_negatives_1:] = 1.0 observations_batch_0 = { key: np.concatenate((negatives_0[key], positives_0[key]), axis=0) for key in self._classifier_0.observation_keys } observations_batch_1 = { key: np.concatenate((negatives_1[key], positives_1[key]), axis=0) for key in self._classifier_1.observation_keys } if self._mixup_alpha > 0: observation_batch_0, labels_batch_0 = mixup( observations_batch_0, labels_batch_0, alpha=self._mixup_alpha) observation_batch_1, labels_batch_1 = mixup( observations_batch_1, labels_batch_1, alpha=self._mixup_alpha) feed_dict_0 = { **{ self._placeholders['observations'][key]: observations_batch_0[key] for key in self._classifier_0.observation_keys }, self._placeholders['labels']: labels_batch_0 } feed_dict_1 = { **{ self._placeholders['observations'][key]: observations_batch_1[key] for key in self._classifier_1.observation_keys }, self._placeholders['labels']: labels_batch_1 } return feed_dict_0, feed_dict_1
def _get_classifier_feed_dict(self): negatives = self.sampler.random_batch( self._classifier_batch_size )['observations'] if self._positive_on_first_occurence: # Still things left to explore env = self._training_environment.unwrapped first_occ_idxs = [] for i in range(len(negatives[next(iter(negatives))])): x_d, y_d = env._discretize_observation({ key: val[i] for key, val in negatives.items() }) if not self._seen_states[x_d][y_d]: # if (x_d, y_d) not in self._seen_states: first_occ_idxs.append(i) # self._seen_states.add((x_d, y_d)) self._seen_states[x_d][y_d] = True # DEBUG: Testing with the same negatives pool for each training iteration # negatives = type(self._pool.data)( # (key[1], value[:self._classifier_batch_size]) # for key, value in self._pool.data.items() # if key[0] == 'observations') rand_positive_ind = np.random.randint( self._goal_examples[next(iter(self._goal_examples))].shape[0], size=self._classifier_batch_size) positives = { key: values[rand_positive_ind] for key, values in self._goal_examples.items() } if self._positive_on_first_occurence: positives = { key: np.concatenate([val, negatives[key][first_occ_idxs]], axis=0) for key, val in positives.items() } labels_batch = np.zeros( (self._classifier_batch_size + (self._classifier_batch_size + len(first_occ_idxs)), 2), dtype=np.int32) labels_batch[:self._classifier_batch_size, 0] = 1 labels_batch[self._classifier_batch_size:, 1] = 1 else: labels_batch = np.zeros( (2 * self._classifier_batch_size, 2), dtype=np.int32) labels_batch[:self._classifier_batch_size, 0] = 1 labels_batch[self._classifier_batch_size:, 1] = 1 observations_batch = { key: np.concatenate((negatives[key], positives[key]), axis=0) # for key in self._classifier.observation_keys for key in self._policy.observation_keys } if self._mixup_alpha > 0: observations_batch, labels_batch = mixup( observations_batch, labels_batch, alpha=self._mixup_alpha) feed_dict = { **{ self._placeholders['observations'][key]: observations_batch[key] # for key in self._classifier.observation_keys for key in self._policy.observation_keys }, self._placeholders['labels']: labels_batch, } return feed_dict
def _get_classifier_feed_dicts(self): # Sample N x the normal amount of observations, where N is # the number of goals. negatives = self.sampler.random_batch( self._num_goals * self._classifier_batch_size)['observations'] # Split up the sample observations based on the goal index. # TODO: Make it split based on the goal qpos negative_inds = [(negatives['goal_index'] == goal).flatten() for goal in range(self._num_goals)] negatives_per_goal = [{ key: values[negative_ind] for key, values in negatives.items() } for negative_ind in negative_inds] # Get positives from different goal pools goal_example_pool_sizes = [ goal_example_pool[next(iter(goal_example_pool.keys()))].shape[0] for goal_example_pool in self._goal_example_pools ] rand_positive_indices = [ np.random.randint(goal_example_pool_size, size=self._classifier_batch_size) for goal_example_pool_size in goal_example_pool_sizes ] positives_per_goal = [{ key: values[rand_positive_ind] for key, values in goal_examples.items() } for rand_positive_ind, goal_examples in zip( rand_positive_indices, self._goal_example_pools)] labels_batches = [] for goal in range(self._num_goals): n_negatives = np.sum(negative_inds[goal].astype(int)) n_positives = self._classifier_batch_size labels_batch = np.concatenate([ np.zeros((n_negatives, 1)), np.ones((n_positives, 1)), ]) labels_batches.append(labels_batch) # labels_batch = np.zeros((2 * self._classifier_batch_size, 1)) # labels_batch[self._classifier_batch_size:] = 1.0 # labels_batches = [labels_batch.copy() for _ in range(self._num_goals)] observation_batches = [{ key: np.concatenate((_negatives[key], _positives[key]), axis=0) for key in self._classifiers[0].observation_keys } for _negatives, _positives in zip( negatives_per_goal, positives_per_goal)] if self._mixup_alpha > 0: for goal_index in range(self._num_goals): observation_batches[goal_index], labels_batches[ goal_index] = mixup(observation_batches[goal_index], labels_batches[goal_index], alpha=self._mixup_alpha) feed_dicts = [{ **{ self._placeholders['observations'][key]: observations_batch[key] for key in self._classifiers[0].observation_keys }, self._placeholders['labels']: labels_batch } for observations_batch, labels_batch in zip( observation_batches, labels_batches)] return feed_dicts