def score_images(sample_o, o_pred, c, c_model, n_samples_per_c, n_contexts): c_score = c_model(o_pred.repeat(n_samples_per_c, 1, 1, 1), sample_o, c.repeat(n_samples_per_c, 1, 1, 1)) mask_o = sample_o.detach().cpu().numpy() c_score = c_score.detach().cpu().numpy() write_number_on_images(mask_o, c_score) # sort the scores from max to min mask_o = mask_o.reshape(-1, n_contexts, *mask_o.shape[1:]) inds = c_score.reshape(-1, n_contexts).argsort(0) sorted_mask_o = mask_o[inds[::-1].reshape(-1), np.tile(np.arange(n_contexts)[None], (n_samples_per_c, 1)).reshape(-1)] sorted_mask_o = np.concatenate([o_pred.detach().cpu().numpy(), sorted_mask_o.reshape(-1, *mask_o.shape[2:])], axis=0) return sorted_mask_o
def run_planning_and_inverse_model(env, envname, hp, n_test_locs, test_data, config, test_mode, model, c_model, actor, savepath): # Finding threshold # TODO: auto collect this & make sure all c_type's work replanning = hp["replanning_every"] edge_weight = hp["use_edge_weight"] using_vae_samples = hp["use_vae_samples"] threshold_edge = hp["threshold_edge"] threshold_wp = hp["threshold_shortcut"] # threshold = 10.5 n_planning_samples = hp["n_planning_samples"] score_type = hp["score_type"] # Precompute the graph o_samples_npy = graph = node_goal = None total_dist = total_success = 0 for i in range(n_test_locs): curr_obs, goal_obs, start_state, goal_state, context = test_data[i] env.reset(start_state) env.render(config=config) # Generate data, Build Graph, Localize goal if i == 0 or test_mode == "eval": # TODO: do batching to get a larger graph if using_vae_samples: o_samples_npy = model.inference(context, n_samples=n_planning_samples, layer_cond=False).cpu().detach().numpy() else: o_samples_npy = generate_samples(envname, env, config, n_planning_samples).cpu().detach().numpy() # threshold = find_threshold(o_samples_npy, context, c_model, min_thres=0, max_thres=50, n_iters=10) graph = build_memory_graph(o_samples_npy, context, c_model, score_type, threshold_edge, edge_weight) save_image(from_numpy_to_var(o_samples_npy), os.path.join(savepath, 'plan_samples.png'), nrow=10) # Precompute goal node and recon o node_goal = localization(goal_obs, o_samples_npy, context, c_model) o_goal_pred, _, _, _ = model(goal_obs, context, determ=True) # Actor run for t in range(hp["T"]): o_curr_pred, _, _, _ = model(curr_obs, context, determ=True) # Localize and do planning if t % replanning == 0: node_cur = localization(o_curr_pred, o_samples_npy, context, c_model) shortest_path, edge_weights, edge_raw = find_shortest_path(graph, node_cur, node_goal) # padding(edge_weights, edge_raw) # Next goal img j = t % replanning next_way_point = find_next_way_point( o_curr_pred, o_samples_npy[shortest_path][min(j // 10, len(shortest_path) - 1):], o_goal_pred, context, threshold_wp, c_model) next_o_goal = next_way_point[None, :] # Visualize plans if t % 50 == 0: img_seq = torch.cat([curr_obs, from_numpy_to_var(o_samples_npy[shortest_path]), goal_obs]) all_score = get_score( c_model, o_curr_pred.repeat(len(img_seq), 1, 1, 1), img_seq, context.repeat(len(img_seq), 1, 1, 1), type="all" ) img_seq_np = img_seq.detach().cpu().numpy() write_number_on_images(img_seq_np, ["%d, %.3f" % (i, j) for i, j in zip(all_score["raw"], all_score[score_type])], position="top-left") write_number_on_images(img_seq_np, ["", ""] + ["%d, %.3f" % (i, j) for i, j in zip(edge_raw, edge_weights)] + [""], position="bottom-left") save_image(torch.Tensor(img_seq_np), os.path.join(savepath, 'plan_task_%d_step_%d.png' % (i, t)), nrow=len(shortest_path) + 2) # Get action action = actor(o_curr_pred, next_o_goal, context).cpu().numpy() _, _ = env.step_only(action + np.random.randn(2) * hp["action_noise"]) next_img = env.get_current_img(config) curr_obs = get_torch_images_from_numpy(next_img[None, :], False, one_image=True) # print("Final State: %s; Goal State: %s" % (env.get_current_obs(), goal_state)) import matplotlib.pyplot as plt if not os.path.exists(os.path.join(savepath, '%d' % i)): os.makedirs(os.path.join(savepath, '%d' % i)) plt.imshow(next_img) plt.savefig(os.path.join(savepath, '%d/%03d.png' % (i, t))) plt.close() curr_dist = np.linalg.norm(env.get_current_obs() - goal_state) is_success = (np.abs(env.get_current_obs() - goal_state) < hp["block_size"]).all() if is_success: break print("Final Distance for Task %d: %f; Success %s" % (i, curr_dist, is_success)) total_dist += curr_dist total_success += is_success print("Summary: total dist %f; success %d out of %d" % (total_dist, total_success, n_test_locs)) return total_dist, total_success, n_test_locs
def plan(self, data_start_loader, data_goal_loader, epoch, metric, keep_best=10): """ Generate visual plans from starts to goals. First, find the closest codes for starts and goals. Then, generate the plans in the latent space. Finally, map the latent plans to visual plans and use the classifier to pick the top K. The start image is loaded from data_start_loader. The goal image is loaded from data_goal_loader. :param data_start_loader: :param data_goal_loader: :param epoch: :param metric: :param keep_best: :return: """ planning_dataloader = zip(data_start_loader, data_goal_loader) for i, pair in enumerate(planning_dataloader, 0): if self.fcn: start_obs = self.apply_fcn_mse(pair[0][0]) goal_obs = self.apply_fcn_mse(pair[1][0]) # Compute c_start and c_goal pt_path = os.path.join( self.out_dir, 'plans', 'c_min_%s_%d_epoch_%d.pt' % (metric, i, epoch)) if os.path.exists(pt_path): c_start, c_goal, est_start_obs, est_goal_obs = torch.load( pt_path) else: _, c_start, _, est_start_obs = self.closest_code( start_obs, 400, False, metric, 1) _, _, c_goal, est_goal_obs = self.closest_code( goal_obs, 400, True, metric, 1) # _, c_start, _, est_start_obs = self.closest_code(start_obs, # self.traj_eval_copies, # False, # metric, 0) # _, _, c_goal, est_goal_obs = self.closest_code(goal_obs, # self.traj_eval_copies, # True, # metric, 0) torch.save([c_start, c_goal, est_start_obs, est_goal_obs], pt_path) # Plan using c_start and c_goal. rollout = self.planner(c_start.repeat(self.traj_eval_copies, 1), c_goal.repeat(self.traj_eval_copies, 1), start_obs=start_obs, goal_obs=goal_obs) # Insert closest start and goal. rollout.insert( 0, est_start_obs.repeat(self.traj_eval_copies, 1, 1, 1)) rollout.append(est_goal_obs.repeat(self.traj_eval_copies, 1, 1, 1)) # Insert real start and goal. rollout.insert(0, start_obs.repeat(self.traj_eval_copies, 1, 1, 1)) rollout.append(goal_obs.repeat(self.traj_eval_copies, 1, 1, 1)) rollout_best_k, confidences = self.get_best_k(rollout, keep_best) rollout_data = torch.stack(rollout_best_k, dim=0) masks = -np.ones( (rollout_data.size()[0], keep_best, self.channel_dim, 64, 64), dtype=np.float32) write_number_on_images(masks, confidences) # save_image(torch.max(rollout_data, from_numpy_to_var(masks)).view(-1, self.channel_dim, 64, 64).data, # os.path.join(self.out_dir, 'plans', '%s_min_%s_%d_epoch_%d.png' # % (self.planner.__name__, metric, i, epoch)), # nrow=keep_best, # normalize=True) pd = torch.max(rollout_data, from_numpy_to_var(masks)).permute( 1, 0, 2, 3, 4).contiguous().view(-1, self.channel_dim, 64, 64) save_image(pd.data, os.path.join( self.out_dir, 'plans', '%s_min_%s_%d_epoch_%d.png' % (self.planner.__name__, metric, i, epoch)), nrow=int(pd.size()[0] / keep_best), normalize=True)
def plan_hack(self, data_start_loader, data_goal_loader, epoch, metric, keep_best=10): """ Generate visual plans from starts to goals. First, find the closest codes for starts and goals. Then, generate the plans in the latent space. Finally, map the latent plans to visual plans and use the classifier to pick the top K. The start image is fixed. The goal image is loaded from data_goal_loader. :param data_start_loader: :param data_goal_loader: :param epoch: :param metric: :param keep_best: :return: """ all_confidences = [] c_start = None est_start_obs = None for img in data_start_loader: if self.fcn: start_obs = self.apply_fcn_mse(img[0]) else: start_obs = Variable(img[0]).cuda() pt_start = os.path.join(self.out_dir, 'plans', 'c_min_start_%s.pt' % metric) if os.path.exists(pt_start): z_start, c_start, _, est_start_obs = torch.load(pt_start) else: z_start, c_start, _, est_start_obs = self.closest_code( start_obs, 400, False, metric, 1) torch.save([z_start, c_start, _, est_start_obs], pt_start) break # Hacky for now try: c_start = Variable(c_start) est_start_obs = Variable(est_start_obs) except RuntimeError: pass for i, img in enumerate(data_goal_loader, 0): if self.fcn: goal_obs = self.apply_fcn_mse(img[0]) else: goal_obs = Variable(img[0]).cuda() pt_goal = os.path.join( self.out_dir, 'plans', 'c_min_goal_%s_%d_epoch_%d.pt' % (metric, i, epoch)) if os.path.exists(pt_goal): z_goal, _, c_goal, est_goal_obs = torch.load(pt_goal) else: z_goal, _, c_goal, est_goal_obs = self.closest_code( goal_obs, 400, True, metric, 1) torch.save([z_goal, _, c_goal, est_goal_obs], pt_goal) # Hacky for now try: c_goal = Variable(c_goal) est_goal_obs = Variable(est_goal_obs) except RuntimeError: pass # Plan using c_start and c_goal. rollout = self.planner(c_start.repeat(self.traj_eval_copies, 1), c_goal.repeat(self.traj_eval_copies, 1), start_obs=start_obs, goal_obs=goal_obs) # Insert closest start and goal. rollout.insert( 0, est_start_obs.repeat(self.traj_eval_copies, 1, 1, 1)) rollout.append(est_goal_obs.repeat(self.traj_eval_copies, 1, 1, 1)) # Insert real start and goal. rollout.insert(0, start_obs.repeat(self.traj_eval_copies, 1, 1, 1)) rollout.append(goal_obs.repeat(self.traj_eval_copies, 1, 1, 1)) rollout_best_k, confidences = self.get_best_k(rollout, keep_best) rollout_data = torch.stack(rollout_best_k, dim=0) masks = -np.ones( (rollout_data.size()[0], keep_best, self.channel_dim, 64, 64), dtype=np.float32) write_number_on_images(masks, confidences) # save_image(torch.max(rollout_data, from_numpy_to_var(masks)).view(-1, self.channel_dim, 64, 64).data, # os.path.join(self.out_dir, 'plans', '%s_min_%s_%d_epoch_%d.png' # % (self.planner.__name__, metric, i, epoch)), # nrow=keep_best, # normalize=True) pd = torch.max(rollout_data, from_numpy_to_var(masks)).permute( 1, 0, 2, 3, 4).contiguous().view(-1, self.channel_dim, 64, 64) # confidences.T has size keep_best x rollout length all_confidences.append(confidences.T[-1][:-1]) save_image(pd.data, os.path.join( self.out_dir, 'plans', '%s_min_%s_%d_epoch_%d.png' % (self.planner.__name__, metric, i, epoch)), nrow=int(pd.size()[0] / keep_best), normalize=True) all_confidences = np.stack(all_confidences) print((all_confidences[:, 0] > 0.9).sum(), (all_confidences[:, -1] > 0.9).sum()) import pickle as pkl with open(os.path.join(self.out_dir, 'all_confidences.pkl'), 'wb') as f: pkl.dump(all_confidences, f) import matplotlib.pyplot as plt plt.boxplot([ all_confidences.mean(1), all_confidences[all_confidences[:, -1] > 0.9].mean(1) ]) plt.savefig(os.path.join(self.out_dir, 'boxplot.png'))