def save_instruction(self, instruction, filename, torch=False, folder=""): if torch: instruction = debug_untokenize_instruction(instruction) if folder != "": os.makedirs(folder, exist_ok=True) with open(os.path.join(folder, filename), "w") as fp: fp.write(instruction)
def get_viz(self): presenter = Presenter() out = {"viz_img": []} for i, img in enumerate(self.viz_images): instruction = self.instructions[i] if len(instruction.view([-1])) < 2: instruction = [0] else: instruction = list(instruction.data.cpu().numpy().squeeze()) instruction_str = debug_untokenize_instruction(instruction) viz_img = presenter.overlay_text(img, instruction_str) out["viz_img"].append(viz_img) return out
def train_top_down_pred(): P.initialize_experiment() setup = P.get_current_parameters()["Setup"] launch_ui() env = PomdpInterface() print("model_name:", setup["top_down_model"]) print("model_file:", setup["top_down_model_file"]) model, model_loaded = load_model( model_name_override=setup["top_down_model"], model_file_override=setup["top_down_model_file"]) exec_model, wrapper_model_loaded = load_model( model_name_override=setup["wrapper_model"], model_file_override=setup["wrapper_model_file"]) affine2d = Affine2D() if model.is_cuda: affine2d.cuda() eval_envs = get_correct_eval_env_id_list() print("eval_envs:", eval_envs) train_instructions, dev_instructions, test_instructions, corpus = get_all_instructions( max_size=setup["max_envs"]) all_instr = { **train_instructions, **dev_instructions, **train_instructions } token2term, word2token = get_word_to_token_map(corpus) dataset = model.get_dataset(envs=eval_envs, dataset_name="supervised", eval=True, seg_level=False) dataloader = DataLoader(dataset, collate_fn=dataset.collate_fn, batch_size=1, shuffle=False, num_workers=1, pin_memory=True) for b, batch in list(enumerate(dataloader)): print("batch:", batch) images = batch["images"] instructions = batch["instr"] label_masks = batch["traj_labels"] affines = batch["affines_g_to_s"] env_ids = batch["env_id"] set_idxs = batch["set_idx"] seg_idxs = batch["seg_idx"] env_id = env_ids[0][0] set_idx = set_idxs[0][0] print("env_id of this batch:", env_id) env.set_environment( env_id, instruction_set=all_instr[env_id][set_idx]["instructions"]) env.reset(0) num_segments = len(instructions[0]) print("num_segments in this batch:", num_segments) write_instruction("") write_real_instruction("None") instruction_str = read_instruction_file() print("Initial instruction: ", instruction_str) # TODO: Reset model state here if we keep any temporal memory etc for s in range(num_segments): start_state = env.reset(s) keep_going = True real_instruction = cuda_var(instructions[0][s], setup["cuda"], 0) tmp = list(real_instruction.data.cpu()[0].numpy()) real_instruction_str = debug_untokenize_instruction(tmp) write_real_instruction(real_instruction_str) #write_instruction(real_instruction_str) #instruction_str = real_instruction_str image = cuda_var(images[0][s], setup["cuda"], 0) label_mask = cuda_var(label_masks[0][s], setup["cuda"], 0) affine_g_to_s = affines[0][s] print("Your current environment:") with open( "/storage/dxsun/unreal_config_nl/configs/configs/random_config_" + str(env_id) + ".json") as fp: config = json.load(fp) print(config) while keep_going: write_real_instruction(real_instruction_str) while True: cv2.waitKey(200) instruction = read_instruction_file() if instruction == "CMD: Next": print("Advancing") keep_going = False write_empty_instruction() break elif instruction == "CMD: Reset": print("Resetting") env.reset(s) write_empty_instruction() elif len(instruction.split(" ")) > 1: instruction_str = instruction print("Executing: ", instruction_str) break if not keep_going: continue #instruction_str = read_instruction_file() # TODO: Load instruction from file tok_instruction = tokenize_instruction(instruction_str, word2token) instruction_t = torch.LongTensor(tok_instruction).unsqueeze(0) instruction_v = cuda_var(instruction_t, setup["cuda"], 0) instruction_mask = torch.ones_like(instruction_v) tmp = list(instruction_t[0].numpy()) instruction_dbg_str = debug_untokenize_instruction( tmp, token2term) # import matplotlib.pyplot as plt #plt.plot(image.squeeze(0).permute(1,2,0).cpu().numpy()) #plt.show() res = model(image, instruction_v, instruction_mask) mask_pred = res[0] shp = mask_pred.shape mask_pred = F.softmax(mask_pred.view([2, -1]), 1).view(shp) #mask_pred = softmax2d(mask_pred) # TODO: Rotate the mask_pred to the global frame affine_s_to_g = np.linalg.inv(affine_g_to_s) S = 8.0 affine_scale_up = np.asarray([[S, 0, 0], [0, S, 0], [0, 0, 1]]) affine_scale_down = np.linalg.inv(affine_scale_up) affine_pred_to_g = np.dot( affine_scale_down, np.dot(affine_s_to_g, affine_scale_up)) #affine_pred_to_g_t = torch.from_numpy(affine_pred_to_g).float() mask_pred_np = mask_pred.data.cpu().numpy()[0].transpose( 1, 2, 0) mask_pred_g_np = apply_affine(mask_pred_np, affine_pred_to_g, 32, 32) print("Sum of global mask: ", mask_pred_g_np.sum()) mask_pred_g = torch.from_numpy( mask_pred_g_np.transpose(2, 0, 1)).float()[np.newaxis, :, :, :] exec_model.set_ground_truth_visitation_d(mask_pred_g) # Create a batch axis for pytorch #mask_pred_g = affine2d(mask_pred, affine_pred_to_g_t[np.newaxis, :, :]) mask_pred_np[:, :, 0] -= mask_pred_np[:, :, 0].min() mask_pred_np[:, :, 0] /= (mask_pred_np[:, :, 0].max() + 1e-9) mask_pred_np[:, :, 0] *= 2.0 mask_pred_np[:, :, 1] -= mask_pred_np[:, :, 1].min() mask_pred_np[:, :, 1] /= (mask_pred_np[:, :, 1].max() + 1e-9) presenter = Presenter() presenter.show_image(mask_pred_g_np, "mask_pred_g", torch=False, waitkey=1, scale=4) #import matplotlib.pyplot as plt #print("image.data shape:", image.data.cpu().numpy().shape) #plt.imshow(image.data.squeeze().permute(1,2,0).cpu().numpy()) #plt.show() # presenter.show_image(image.data, "mask_pred_g", torch=False, waitkey=1, scale=4) #import pdb; pdb.set_trace() pred_viz_np = presenter.overlaid_image(image.data, mask_pred_np, channel=0) # TODO: Don't show labels # TODO: OpenCV colours #label_mask_np = p.data.cpu().numpy()[0].transpose(1,2,0) labl_viz_np = presenter.overlaid_image(image.data, label_mask.data, channel=0) viz_img_np = np.concatenate((pred_viz_np, labl_viz_np), axis=1) viz_img_np = pred_viz_np viz_img = presenter.overlay_text(viz_img_np, instruction_dbg_str) cv2.imshow("interactive viz", viz_img) cv2.waitKey(100) rollout_model(exec_model, env, env_ids[0][s], set_idxs[0][s], seg_idxs[0][s], tok_instruction) write_instruction("")
def get_action(self, state, instruction=None, sample=False, rl_rollout=False): if self.rviz is not None: self.rviz.publish_instruction_text("instruction", debug_untokenize_instruction(instruction)) return self.get_follow_path_action(state.state), {}
def print_tokenized_instruction(self, instruction): instr_str = debug_untokenize_instruction(instruction) print("instruction: " + str(instr_str))
def get_action(self, state, instruction, sample=False, rl_rollout=False): """ Given a DroneState (from PomdpInterface) and instruction, produce a numpy 4D action (x, y, theta, pstop) :param state: DroneState object with the raw image from the simulator :param instruction: Tokenized instruction given the corpus :param sample: (Only applies if self.rl): If true, sample action from action distribution. If False, take most likely action. #TODO: Absorb corpus within model :return: """ self.eval() ACTPROF = False actprof = SimpleProfiler(print=ACTPROF, torch_sync=ACTPROF) states, images_fpv = self.states_to_torch(state) first_step = True if instruction == self.prev_instruction: first_step = False if first_step: self.reset() self.start_poses = self.cam_poses_from_states(states) if self.rviz is not None: dbg_instr = "\n".join(Presenter().split_lines( debug_untokenize_instruction(instruction), maxchars=45)) self.rviz.publish_instruction_text("instruction", dbg_instr) self.prev_instruction = instruction self.seq_step += 1 instr_len = [len(instruction)] if instruction is not None else None instructions = torch.LongTensor(instruction).unsqueeze(0) plan_now = self.seq_step % self.s1_params[ "plan_every_n_steps"] == 0 or first_step # Run stage1 visitation prediction # TODO: There's a bug here where we ignore images between planning timesteps. That's why must plan every timestep if plan_now or True: device = next(self.parameters()).device images_fpv = images_fpv.to(device) states = states.to(device) instructions = instructions.to(device) self.start_poses = self.start_poses.to(device) actprof.tick("start") #print("Planning for: " + debug_untokenize_instruction(list(instructions[0].detach().cpu().numpy()))) self.log_v_dist_w, v_dist_w_poses, self.log_goal_oob_score, rl_outputs = self.stage1_visitation_prediction( images_fpv, states, instructions, instr_len, plan=[True], firstseg=[first_step], noisy_start_poses=self.start_poses, start_poses=self.start_poses, select_only=True, rl=True) actprof.tick("stage1") self.map_coverage_w = rl_outputs["map_coverage_w"] self.map_uncoverage_w = rl_outputs["map_uncoverage_w"] self.v_dist_w, self.goal_oob_prob_w = self.visitation_softmax( self.log_v_dist_w, self.log_goal_oob_score) if self.rviz: v_dist_w_np = self.v_dist_w[0].data.cpu().numpy().transpose( 1, 2, 0) # expand to 0-1 range v_dist_w_np[:, :, 0] /= (np.max(v_dist_w_np[:, :, 0]) + 1e-10) v_dist_w_np[:, :, 1] /= (np.max(v_dist_w_np[:, :, 1]) + 1e-10) self.rviz.publish_map("visitation_dist", v_dist_w_np, self.s1_params["world_size_m"]) # Transform to robot reference frame cam_poses = self.cam_poses_from_states(states) # Log-distributions CANNOT be transformed - the transformer fills empty space with zeroes, which makes sense for # probability distributions, but makes no sense for likelihood scores map_coverage_r, _ = self.map_transformer_w_to_r( self.map_coverage_w, None, cam_poses) map_uncoverage_r, _ = self.map_transformer_w_to_r( self.map_uncoverage_w, None, cam_poses) v_dist_r, r_poses = self.map_transformer_w_to_r( self.v_dist_w, None, cam_poses) # Run stage2 action generation if self.rl: actprof.tick("pipes") # If RL, stage 2 outputs distributions over actions (following torch.distributions API) xvel_dist, yawrate_dist, stop_dist, value = self.stage2_action_generation( v_dist_r, map_uncoverage_r, eval=True) actprof.tick("stage2") if sample: xvel, yawrate, stop = self.stage2_action_generation.sample_action( xvel_dist, yawrate_dist, stop_dist) else: xvel, yawrate, stop = self.stage2_action_generation.mode_action( xvel_dist, yawrate_dist, stop_dist) actprof.tick("sample") xvel_logprob, yawrate_logprob, stop_logprob = self.stage2_action_generation.action_logprob( xvel_dist, yawrate_dist, stop_dist, xvel, yawrate, stop) xvel = xvel.detach().cpu().numpy() yawrate = yawrate.detach().cpu().numpy() stop = stop.detach().cpu().numpy() xvel_logprob = xvel_logprob.detach() yawrate_logprob = yawrate_logprob.detach() stop_logprob = stop_logprob.detach() value = value.detach() #.cpu().numpy() # Add an empty column for sideways velocity act = np.concatenate([xvel, np.zeros(xvel.shape), yawrate, stop]) # This will be needed to compute rollout statistics later on #v_dist_w = self.visitation_softmax(self.log_v_dist_w, self.log_goal_oob_score) # Keep all the info we will need later for A2C / PPO training # TODO: We assume independence between velocity and stop distributions. Not true, but what ya gonna do? rl_data = { "policy_input": v_dist_r[0].detach(), "v_dist_w": self.v_dist_w[0].detach(), "policy_input_b": map_uncoverage_r[0].detach(), "value_pred": value[0], "xvel": xvel, "yawrate": yawrate, "stop": stop, "xvel_logprob": xvel_logprob, "yawrate_logprob": yawrate_logprob, "stop_logprob": stop_logprob, "action_logprob": xvel_logprob + stop_logprob + yawrate_logprob } actprof.tick("end") actprof.loop() actprof.print_stats(1) if rl_rollout: return act, rl_data else: return act else: action = self.stage2_action_generation(v_dist_r, firstseg=[first_step], eval=True) output_action = action.squeeze().data.cpu().numpy() stop_prob = output_action[3] output_stop = 1 if stop_prob > self.s2_params[ "stop_threshold"] else 0 output_action[3] = output_stop return output_action
def get_action(self, state, instruction): """ Given a DroneState (from PomdpInterface) and instruction, produce a numpy 4D action (x, y, theta, pstop) :param state: DroneState object with the raw image from the simulator :param instruction: Tokenized instruction given the corpus #TODO: Absorb corpus within model :return: """ # TODO: Simplify this self.eval() images_np_pure = state.image state_np = state.state #print("Act: " + debug_untokenize_instruction(instruction)) images_np = standardize_image(images_np_pure) image_fpv = Variable(none_padded_seq_to_tensor([images_np])) state = Variable(none_padded_seq_to_tensor([state_np])) # Add the batch dimension first_step = True if instruction == self.prev_instruction: first_step = False self.prev_instruction = instruction instruction_str = debug_untokenize_instruction(instruction) # TODO: Move this to PomdpInterface (for now it's here because this is already visualizing the maps) if first_step: if self.rviz is not None: self.rviz.publish_instruction_text( "instruction", debug_untokenize_instruction(instruction)) img_in_t = image_fpv img_in_t.volatile = True instr_len = [len(instruction)] if instruction is not None else None instruction = torch.LongTensor(instruction).unsqueeze(0) instruction = cuda_var(instruction, self.is_cuda, self.cuda_device) state.volatile = True if self.is_cuda: if img_in_t is not None: img_in_t = img_in_t.cuda(self.cuda_device) state = state.cuda(self.cuda_device) step_enc = None plan_now = None self.seq_step += 1 action = self(img_in_t, state, instruction, instr_len, plan=plan_now, pos_enc=step_enc) passive_mode_debug_projections = True if passive_mode_debug_projections: self.show_landmark_locations(loop=False, states=state) self.reset() # Run auxiliary objectives for debugging purposes (e.g. to compute classification predictions) if self.params.get("run_auxiliaries_at_test_time"): _, _ = self.aux_losses.calculate_aux_loss(self.tensor_store, reduce_average=True) overlaid = self.get_overlaid_classification_results( whole_batch=False) # Save materials for analysis and presentation if self.params["write_figures"]: self.save_viz(images_np_pure, instruction_str) output_action = action.squeeze().data.cpu().numpy() stop_prob = output_action[3] output_stop = 1 if stop_prob > self.params["stop_p"] else 0 output_action[3] = output_stop return output_action
def get_action(self, state, instruction, sample=False, rl_rollout=False): """ Given a DroneState (from PomdpInterface) and instruction, produce a numpy 4D action (x, y, theta, pstop) :param state: DroneState object with the raw image from the simulator :param instruction: Tokenized instruction given the corpus :param sample: (Only applies if self.rl): If true, sample action from action distribution. If False, take most likely action. :return: """ self.eval() states, images_fpv = self.states_to_torch(state) first_step = True if instruction == self.prev_instruction: first_step = False if first_step: self.reset() self.start_cam_poses = self.cam_poses_from_states(states) if self.rviz is not None: dbg_instr = "\n".join(Presenter().split_lines( debug_untokenize_instruction(instruction), maxchars=45)) self.rviz.publish_instruction_text("instruction", dbg_instr) self.prev_instruction = instruction self.seq_step += 1 instr_len = [len(instruction)] if instruction is not None else None instructions = torch.LongTensor(instruction).unsqueeze(0) plan_now = self.seq_step % self.s1_params[ "plan_every_n_steps"] == 0 or first_step # Run stage1 visitation prediction # TODO: There's a bug here where we ignore images between planning timesteps. That's why must plan every timestep if plan_now or True: device = next(self.parameters()).device images_fpv = images_fpv.to(device) states = states.to(device) instructions = instructions.to(device) self.start_cam_poses = self.start_cam_poses.cuda(device) self.actprof.tick("start") #print("Planning for: " + debug_untokenize_instruction(list(instructions[0].detach().cpu().numpy()))) self.log_v_dist_w, v_dist_w_poses, rl_outputs = self.stage1_visitation_prediction( images_fpv, states, instructions, instr_len, plan=[True], firstseg=[first_step], noisy_start_poses=self.start_cam_poses, start_poses=self.start_cam_poses, select_only=True, rl=True, noshow=True) self.actprof.tick("stage1") self.map_uncoverage_w = rl_outputs["map_uncoverage_w"] self.v_dist_w = self.log_v_dist_w.softmax() # TODO: Fix if self.rviz: #a.k.a False v_dist_w_np = self.v_dist_w.inner_distribution[0].data.cpu( ).numpy().transpose(1, 2, 0) # expand to 0-1 range v_dist_w_np[:, :, 0] /= (np.max(v_dist_w_np[:, :, 0]) + 1e-10) v_dist_w_np[:, :, 1] /= (np.max(v_dist_w_np[:, :, 1]) + 1e-10) self.rviz.publish_map("visitation_dist", v_dist_w_np, self.s1_params["world_size_m"]) # Transform to robot reference frame drn_poses = self.poses_from_states(states) # Log-distributions CANNOT be transformed - the transformer fills empty space with zeroes, which makes sense for # probability distributions, but makes no sense for likelihood scores x = self.v_dist_w.inner_distribution xr, r_poses = self.map_transformer_w_to_r(x, None, drn_poses) v_dist_r = Partial2DDistribution(xr, self.v_dist_w.outer_prob_mass) structured_map_info_r, map_info_w = self.build_map_structured_input( self.map_uncoverage_w, drn_poses) if self.oracle_stage1: # Ground truth visitation distributions (in start and global frames) assert self.current_segment is not None, "start_segment_rollout must be called before rolling out model that uses ground truth" env_id, set_idx, seg_idx = self.current_segment v_dist_w_gt = aup.resolve_and_get_ground_truth_static_global( env_id, set_idx, seg_idx, self.s1_params["global_map_size"], self.s1_params["world_size_px"]).to(images_fpv.device) v_dist_r_ground_truth_select, poses_r = self.map_transformer_w_to_r( v_dist_w_gt, None, drn_poses) map_uncoverage_r = structured_map_info_r[:, 0, :, :] # PVNv2: Mask the visitation distributions according to observability thus far: if self.s1_params["clip_observability"]: v_dist_r_gt_masked = Partial2DDistribution.from_distribution_and_mask( v_dist_r_ground_truth_select, 1 - map_uncoverage_r) # PVNv1: Have P(oob)=0, and use unmasked ground-truth visitation distributions else: v_dist_r_gt_masked = Partial2DDistribution( v_dist_r_ground_truth_select, torch.zeros_like(v_dist_r_ground_truth_select[:, :, 0, 0])) v_dist_r = v_dist_r_gt_masked if False: Presenter().show_image(structured_map_info_r, "map_struct", scale=4, waitkey=1) v_dist_r.show("v_dist_r", scale=4, waitkey=1) # Run stage2 action generation if self.rl: self.actprof.tick("pipes") # If RL, stage 2 outputs distributions over actions (following torch.distributions API) xvel_dist, yawrate_dist, stop_dist, value = self.stage2_action_generation( v_dist_r, structured_map_info_r, eval=True) self.actprof.tick("stage2") if sample: xvel, yawrate, stop = self.stage2_action_generation.sample_action( xvel_dist, yawrate_dist, stop_dist) else: xvel, yawrate, stop = self.stage2_action_generation.mode_action( xvel_dist, yawrate_dist, stop_dist) self.actprof.tick("sample") xvel_logprob, yawrate_logprob, stop_logprob = self.stage2_action_generation.action_logprob( xvel_dist, yawrate_dist, stop_dist, xvel, yawrate, stop) xvel = xvel.detach().cpu().numpy() yawrate = yawrate.detach().cpu().numpy() stop = stop.detach().cpu().numpy() xvel_logprob = xvel_logprob.detach() yawrate_logprob = yawrate_logprob.detach() stop_logprob = stop_logprob.detach() if value is not None: value = value.detach() #.cpu().numpy() if self.s2_params.get("use_stop_threshold"): stop_prob = stop_dist.probs.detach().item() stop = 1 if stop_prob > self.s2_params.get( "stop_threshold") else 0 stop = np.ones_like(xvel) * stop print("stop prob: ", stop_prob, " -> ", stop) # Add an empty column for sideways velocity act = np.concatenate([xvel, np.zeros(xvel.shape), yawrate, stop]) # This will be needed to compute rollout statistics later on #v_dist_w = self.visitation_softmax(self.log_v_dist_w, self.log_goal_oob_score) # Keep all the info we will need later for A2C / PPO training # TODO: We assume independence between velocity and stop distributions. Not true, but what ya gonna do? rl_data = { "policy_input": v_dist_r.detach(), "policy_input_b": structured_map_info_r[0].detach(), "v_dist_w": self.v_dist_w.inner_distribution[0].detach(), "value_pred": value[0] if value else None, "xvel": xvel, "yawrate": yawrate, "stop": stop, "xvel_logprob": xvel_logprob, "yawrate_logprob": yawrate_logprob, "stop_logprob": stop_logprob, "action_logprob": xvel_logprob + stop_logprob + yawrate_logprob } self.actprof.tick("end") self.actprof.loop() self.actprof.print_stats(1) if rl_rollout: return act, rl_data else: viz_data = self.make_viz_dict_from_stage1_internals() viz_data["v_dist_r_inner"] = v_dist_r.inner_distribution[ 0].detach().cpu().numpy() viz_data["v_dist_r_outer"] = v_dist_r.outer_prob_mass[ 0].detach().cpu().numpy() viz_data["v_dist_w_inner"] = self.v_dist_w.inner_distribution[ 0].detach().cpu().numpy() viz_data["v_dist_w_outer"] = self.v_dist_w.outer_prob_mass[ 0].detach().cpu().numpy() viz_data["map_struct"] = structured_map_info_r[0].detach().cpu( ).numpy() viz_data["BM_W"] = map_info_w[0].detach().cpu().numpy() return act, viz_data else: raise NotImplemented( "Non-RL learning mode no longer support it - just use RL learning mode as it is more general!" )
def get_action(self, state, instruction): """ Given a DroneState (from PomdpInterface) and instruction, produce a numpy 4D action (x, y, theta, pstop) :param state: DroneState object with the raw image from the simulator :param instruction: Tokenized instruction given the corpus #TODO: Absorb corpus within model :return: """ # TODO: Simplify this self.eval() images_np_pure = state.image state_np = state.state #print("Act: " + debug_untokenize_instruction(instruction)) images_np = standardize_image(images_np_pure) image_fpv = Variable(none_padded_seq_to_tensor([images_np])) state = Variable(none_padded_seq_to_tensor([state_np])) # Add the batch dimension first_step = True if instruction == self.prev_instruction: first_step = False self.prev_instruction = instruction instruction_str = debug_untokenize_instruction(instruction) # TODO: Move this to PomdpInterface (for now it's here because this is already visualizing the maps) if first_step: if self.rviz is not None: self.rviz.publish_instruction_text( "instruction", debug_untokenize_instruction(instruction)) #if first_step: # say(debug_untokenize_instruction(instruction)) img_in_t = image_fpv img_in_t.volatile = True instr_len = [len(instruction)] if instruction is not None else None instruction = torch.LongTensor(instruction).unsqueeze(0) instruction = cuda_var(instruction, self.is_cuda, self.cuda_device) state.volatile = True if self.is_cuda: if img_in_t is not None: img_in_t = img_in_t.cuda(self.cuda_device) state = state.cuda(self.cuda_device) step_enc = None plan_now = None self.seq_step += 1 action = self(img_in_t, state, instruction, instr_len, plan=plan_now, pos_enc=step_enc) # Save materials for analysis and presentation if self.params["write_figures"]: self.save_viz(images_np_pure, instruction_str) output_action = action.squeeze().data.cpu().numpy() stop_prob = output_action[3] print(f"P(STOP): {stop_prob}") output_stop = 1 if stop_prob > self.params["stop_p"] else 0 output_action[3] = output_stop return output_action