def _get_latent_locs(self, model, hidden_state, file_name=None): rand_actions = ptu.from_numpy( np.stack([ self.env.sample_action() for _ in range(self.num_loc_samples) ])) # (B,A) state_action_attention, interaction_attention, all_delta_vals, all_lambdas_deltas = \ model.get_all_activation_values(hidden_state, rand_actions) interaction_attention = interaction_attention.sum( -1) # (B,K,K-1) -> (B,K) normalized_weights = interaction_attention / interaction_attention.sum( 0) #(B,K) mean_point = (normalized_weights.unsqueeze(2) * rand_actions[:, :2].unsqueeze(1) ) # ((B,K)->(B,K,1) * (B,2)->(B,1,2)) -> (B,K,2) mean_point = mean_point.sum(0) #(B,K,2) -> (K,2) if file_name is not None: plot_action_vals(self.env, ptu.get_numpy(interaction_attention), ptu.get_numpy(rand_actions), "{}/{}_pick_locs".format(self.logging_dir, file_name), is_normalized=True) return mean_point
def plot_action_errors(self, env, actions, pred_recons, file_name): errors = env.get_action_error(ptu.get_numpy(actions)) # (B) np full_plot = pred_recons.view( [5, -1] + list(pred_recons.shape[1:])) # (5,B//5,3,D,D) caption = np.reshape(errors, (5, -1)) # (5,B//5) np plot_multi_image(ptu.get_numpy(full_plot), '{}/{}.png'.format(self.logging_dir, file_name), caption=caption)
def sum_aggregate(self, goal_latents, goal_latents_recon, goal_image, pred_latents, pred_latents_recon, pred_images): n_goal_latents = goal_latents_recon.shape[0] #Note, this should equal K if we did not filter anything out # Compare against each goal latent costs = [] #(n_goal_latents, n_actions) latent_idxs = [] # (n_goal_latents, n_actions), [a,b] is an index corresponding to a latent for i in range(n_goal_latents): #Going through all n_goal_latents goal latents # pdb.set_trace() single_costs = self.get_single_costs(goal_latents[i], goal_latents_recon[i], pred_latents, pred_latents_recon) min_costs, latent_idx = single_costs.min(-1) # take min among K, size is (n_actions) costs.append(min_costs) latent_idxs.append(latent_idx) costs = torch.stack(costs) # (n_goal_latents, n_actions) latent_idxs = torch.stack(latent_idxs) # (n_goal_latents, n_actions) #Sort by sum cost #Image contains the following: Pred_images, goal_latent_reconstructions, and # corresponding pred_latent_reconstructions #For every latent in goal latents, find corresponding predicted one (this is in latent_idxs) # Should have something that is (K, num_actions) -> x[a,b] is index for pred_latents_recon sorted_costs, best_action_idxs = costs.sum(0).sort() if self.plot_actions: sorted_pred_images = pred_images[best_action_idxs] corresponding_pred_latent_recons = [] for i in range(n_goal_latents): tmp = pred_latents_recon[best_action_idxs, latent_idxs[i, best_action_idxs]] # (n_actions, 3, 64, 64) corresponding_pred_latent_recons.append(tmp) corresponding_pred_latent_recons = torch.stack(corresponding_pred_latent_recons) # (n_goal_latents, n_actions, 3, 64, 64) corresponding_costs = costs[:, best_action_idxs] full_plot = torch.cat([sorted_pred_images.unsqueeze(0), # (1, n_actions, 3, 64, 64) corresponding_pred_latent_recons, # (n_goal_latents, n_actions, 3, 64, 64) ], 0) plot_size = self.plot_actions full_plot = full_plot[:, :plot_size] #Add goal latents tmp = torch.cat([goal_image, goal_latents_recon], dim=0).unsqueeze(1) # (n_goal_latents+1, 1, 3, 64, 64) full_plot = torch.cat([tmp, full_plot], dim=1) #Add captions caption = np.zeros(full_plot.shape[:2]) caption[0, 1:] = ptu.get_numpy(sorted_costs[:plot_size]) caption[1:1+n_goal_latents, 1:] = ptu.get_numpy(corresponding_costs[:plot_size])[:,:plot_size] plot_multi_image(ptu.get_numpy(full_plot), '{}/{}.png'.format(self.logging_directory, self.image_suffix), caption=caption) return ptu.get_numpy(sorted_costs), ptu.get_numpy(best_action_idxs), np.zeros(len(sorted_costs))
def run_plan(self, goal_image, env, action_selection_class, num_actions_to_take, true_data, filter_goal_image=None): #Goal inference self.env = env goal_info = self.goal_inference(goal_image, filter_goal_image) # pdb.set_trace() #State acquisition # cur_state_and_other_info = self.state_acquisition(initial_obs, initial_actions) #Not required for stage 1 #Planning actions, pred_recons, obs, try_obs = [], [], [], [ ] #(T), (T,3,D,D), (T,D,D,3) numpy, (T,D,D,3) numpy for t in range(num_actions_to_take): next_action, goal_latent_index, pred_recon = action_selection_class.select_action( goal_info, env, self.model, "{}".format(t)) #Returns an action (Tp,A) actions.append(next_action) # (A) pred_recons.append(pred_recon) # (3,D,D) try_obs.append(env.try_action(next_action)) # (D,D,3), numpy array obs.append(env.step(next_action)) # (D,D,3), numpy array # next_obs = np.array([env.step(an_action) for an_action in next_actions]) #(Tp=1,D,D,3) 255's numpy # actions.extend(next_actions) # obs.extend(next_obs) # self._remove_goal_latent(goal_info, goal_latent_index) # cur_state_and_other_info = self.update_state(next_obs, next_actions, cur_state_and_other_info["state"]) #Not required for stage 1 ########Create final mpc image######## obs = process_env_obs(np.array(obs)) #(T,3,D,D) try_obs = process_env_obs(np.array(try_obs)) #(T,3,D,D) pred_recons = torch.stack(pred_recons) save_image(torch.cat([obs, pred_recons, try_obs], dim=0), "{}/mpc.png".format(self.logging_dir), nrow=obs.shape[0]) ########Compute result stats######## final_obs = process_env_obs(env.get_observation()) # (1,3,D,D) torch_goal_image = process_env_obs(goal_image) # (1,3,D,D) mse = ptu.get_numpy( torch.pow(final_obs - torch_goal_image, 2).mean()) # Compare final obs to goal obs (Sc), numpy (correct, max_pos, max_rgb), state = env.compute_accuracy(true_data) stats = { 'mse': mse, 'correct': int(correct), 'max_pos': max_pos, 'max_rgb': max_rgb, 'actions': actions } return stats
def get_mse_from_dataset(variant): from op3.core import logger copy_to_save_file(logger.get_snapshot_dir()) train_path = get_module_path() + '/ec2_data/{}.h5'.format( variant['dataset']) num_samples = 100 train_dataset, _ = load_dataset(train_path, train=False, batchsize=1, size=num_samples, static=False) models_and_type = [] for a_model in variant['models']: m = load_model(a_model["saved_model_args"], train_dataset.action_dim, a_model["K"]) m_type = a_model['model_type'] models_and_type.append((m, m_type)) # image_indices = list(range(50)) batch_indices = np.arange(0, num_samples, 4) #bs=4 all_mse = [] for i in range(len(batch_indices) - 1): start_idx, end_idx = batch_indices[i], batch_indices[i + 1] frames, actions = train_dataset[start_idx:end_idx] #(bs, T, 3, D, D) # pdb.set_trace() # frames = frames.unsqueeze(0) # actions = actions.unsqueeze(0) mse = get_mse(models_and_type, frames, actions, variant['T']) #(M, bs, T), torch tensors all_mse.append(mse.permute(1, 0, 2)) #(bs, M, T) all_mse = torch.stack(all_mse, dim=0) #(I/bs, bs, M, T) all_mse = ptu.get_numpy( all_mse.view(-1, len(models_and_type), variant['T'])) #(I, M, T), numpy array now np.save(logger.get_snapshot_dir() + '/computed_mse.npy', all_mse) mean_vals = np.mean(all_mse, axis=0) #(M, T) std_vals = np.std(all_mse, axis=0) #(M, T) for i in range(len(models_and_type)): if models_and_type[i][1] == 'next_step' or 'rprp_pred': plt.errorbar(range(1, variant['T']), mean_vals[i][1:], std_vals[i][1:], label='{}'.format(models_and_type[i][1]), capsize=5) else: plt.errorbar(range(0, variant['T']), mean_vals[i], std_vals[i], label='{}'.format(models_and_type[i][1]), capsize=5) # plt.legend(bbox_to_anchor=(0.4, 0.8), loc="upper right") plt.legend(loc='center left', bbox_to_anchor=(1, 0.5)) # plt.yscale('log') plt.savefig(logger.get_snapshot_dir() + '/relative_mse.png', bbox_inches="tight")
def _random_shooting(self, actions, model, image_suffix): # Like internal_inference except initial_hidden_state might only contain one state while obs/actions contain (B,*) # Inputs: obs (B,T1,3,D,D) or None, actions (B,T2,A) or None, initial_hidden_state or None, schedule (T3) # Note: Assume that initial_hidden_state has entries of size (B=1,*) goal_info = self.goal_info schedule = np.array([1] * actions.shape[1]) actions = ptu.from_numpy(actions) if self.action_type is None: predicted_info = model.batch_internal_inference( obs=None, actions=actions, initial_hidden_state=self.initial_hidden_state, schedule=schedule, figure_path=None) all_env_actions = actions else: predicted_info, all_env_actions = self._latent_batch_internal_inference( model, actions, self.initial_hidden_state) # Inputs to get_action_rankings(): goal_latents (n_goal_latents=K,R), # goal_latents_recon (n_goal_latents=K,3,64,64), goal_image (1,3,64,64), pred_latents (n_actions,K,R), # pred_latents_recon (n_actions,K,3,64,64), pred_images (n_actions,3,64,64) sorted_costs, best_actions_indices, goal_latent_indices = self.score_actions_class.get_action_rankings( goal_info["state"]["post"]["samples"][0], goal_info["sub_images"][0], goal_info["goal_image"], predicted_info["state"]["post"]["samples"], predicted_info["sub_images"], predicted_info["final_recon"], image_suffix=image_suffix) num_plot_actions = 20 self.plot_action_errors( self.env, all_env_actions[best_actions_indices][:num_plot_actions, 0], predicted_info["final_recon"][best_actions_indices] [:num_plot_actions], image_suffix + "_action_errors") best_single_env_action = all_env_actions[best_actions_indices[0]] return best_actions_indices, goal_latent_indices, predicted_info[ "final_recon"], ptu.get_numpy(best_single_env_action)
def run_plan(self, goal_image, env, initial_obs, initial_actions, action_selection_class, num_actions_to_take, planning_horizon, true_data, filter_goal_image=None): #Goal inference self.env = env goal_info = self.goal_inference(goal_image, filter_goal_image) #State acquisition cur_state_and_other_info = self.state_acquisition( initial_obs, initial_actions) initial_recon = cur_state_and_other_info["final_recon"] #Planning actions_taken, actions_planned, pred_recons, obs, try_obs = [], [], [], [], [] #(T), (?,3,D,D), (T,D,D,3) np, (T,D,D,3) np, (T,D,D,3) pred_recons = [initial_recon[0]] best_accuracy = 0 first_finished_plan_steps = np.nan for t in range(num_actions_to_take): next_actions, goal_latent_index, pred_recon = action_selection_class.select_action( goal_info, cur_state_and_other_info["state"], env, self.model, "{}".format(t)) # (Tp,A), (Sc), (3,D,D) # try_obs.append(env.try_step(next_actions)) # (D,D,3) # pred_recons.append(pred_recon) # (3,D,D) next_obs = [env.get_observation() ] # This is needed for update_state for i in range(planning_horizon): next_obs.append(env.step(next_actions[i])) # (D,D,3), np try_obs.append(env.try_step(next_actions)) # (D,D,3) pred_recons.append(pred_recon) # (3,D,D) actions_taken.extend(next_actions[:planning_horizon]) # (Tt,A) actions_planned.append(next_actions) # (Tp,A) obs.extend( next_obs[1:]) # Don't want to include starting image again # next_obs = np.array([env.step(an_action) for an_action in next_actions]) #(Tp=1,D,D,3) 255's numpy # actions.extend(next_actions) # obs.extend(next_obs) # self._remove_goal_latent(goal_info, goal_latent_index) next_obs = np.array(next_obs) # (Tp+1,D,D,3) np cur_state_and_other_info = self.update_state( next_obs, next_actions[:planning_horizon], cur_state_and_other_info["state"], file_name="{}/state_update_{}.png".format(self.logging_dir, t)) accuracy = self.env.compute_accuracy( true_data, threshold=self.accuracy_threshold) best_accuracy = max(accuracy, best_accuracy) if first_finished_plan_steps is np.nan and accuracy == 1: first_finished_plan_steps = t + 1 break ########Create final mpc image######## # pdb.set_trace() goal_image_tensor = process_env_obs(goal_image) # (1,3,D,D) starting_image_tensor = process_env_obs(initial_obs[-1]) # (1,3,D,D) obs = np.concatenate((initial_obs[-1:], obs)) # (T+1,D,D,3) np obs = process_env_obs(np.array(obs)) # (T+1,3,D,D) obs = torch.cat([obs, goal_image_tensor]) # (T+2,3,D,D) try_obs = process_env_obs(np.array(try_obs)) # (T,3,D,D) try_obs = torch.cat( [starting_image_tensor, try_obs, goal_image_tensor]) # (T+2,3,D,D) pred_recons = torch.stack(pred_recons) # (T+1,3,D,D) pred_recons = torch.cat([pred_recons, goal_info["final_recon"]]) # (T+2,3,D,D) save_image(torch.cat([obs, pred_recons, try_obs], dim=0), "{}/mpc.png".format(self.logging_dir), nrow=obs.shape[0]) ########Compute result stats######## final_obs = process_env_obs(env.get_observation()) # (1,3,D,D) torch_goal_image = process_env_obs(goal_image) # (1,3,D,D) mse = ptu.get_numpy( torch.pow(final_obs - torch_goal_image, 2).mean()) # Compare final obs to goal obs (Sc), numpy #correct = env.compute_accuracy(true_data, threshold=self.accuracy_threshold) stats = { 'mse': mse, 'correct': best_accuracy, 'actions': actions_taken, "first_finished_plan_steps": first_finished_plan_steps } return stats