Esempio n. 1
0
    def save_instruction(self, instruction, filename, torch=False, folder=""):
        if torch:
            instruction = debug_untokenize_instruction(instruction)

        if folder != "":
            os.makedirs(folder, exist_ok=True)
        with open(os.path.join(folder, filename), "w") as fp:
            fp.write(instruction)
Esempio n. 2
0
 def get_viz(self):
     presenter = Presenter()
     out = {"viz_img": []}
     for i, img in enumerate(self.viz_images):
         instruction = self.instructions[i]
         if len(instruction.view([-1])) < 2:
             instruction = [0]
         else:
             instruction = list(instruction.data.cpu().numpy().squeeze())
         instruction_str = debug_untokenize_instruction(instruction)
         viz_img = presenter.overlay_text(img, instruction_str)
         out["viz_img"].append(viz_img)
     return out
Esempio n. 3
0
def train_top_down_pred():
    P.initialize_experiment()
    setup = P.get_current_parameters()["Setup"]
    launch_ui()

    env = PomdpInterface()

    print("model_name:", setup["top_down_model"])
    print("model_file:", setup["top_down_model_file"])

    model, model_loaded = load_model(
        model_name_override=setup["top_down_model"],
        model_file_override=setup["top_down_model_file"])

    exec_model, wrapper_model_loaded = load_model(
        model_name_override=setup["wrapper_model"],
        model_file_override=setup["wrapper_model_file"])

    affine2d = Affine2D()
    if model.is_cuda:
        affine2d.cuda()

    eval_envs = get_correct_eval_env_id_list()
    print("eval_envs:", eval_envs)
    train_instructions, dev_instructions, test_instructions, corpus = get_all_instructions(
        max_size=setup["max_envs"])
    all_instr = {
        **train_instructions,
        **dev_instructions,
        **train_instructions
    }
    token2term, word2token = get_word_to_token_map(corpus)

    dataset = model.get_dataset(envs=eval_envs,
                                dataset_name="supervised",
                                eval=True,
                                seg_level=False)
    dataloader = DataLoader(dataset,
                            collate_fn=dataset.collate_fn,
                            batch_size=1,
                            shuffle=False,
                            num_workers=1,
                            pin_memory=True)

    for b, batch in list(enumerate(dataloader)):
        print("batch:", batch)
        images = batch["images"]
        instructions = batch["instr"]
        label_masks = batch["traj_labels"]
        affines = batch["affines_g_to_s"]
        env_ids = batch["env_id"]
        set_idxs = batch["set_idx"]
        seg_idxs = batch["seg_idx"]

        env_id = env_ids[0][0]
        set_idx = set_idxs[0][0]
        print("env_id of this batch:", env_id)
        env.set_environment(
            env_id, instruction_set=all_instr[env_id][set_idx]["instructions"])
        env.reset(0)

        num_segments = len(instructions[0])
        print("num_segments in this batch:", num_segments)
        write_instruction("")
        write_real_instruction("None")
        instruction_str = read_instruction_file()
        print("Initial instruction: ", instruction_str)

        # TODO: Reset model state here if we keep any temporal memory etc
        for s in range(num_segments):
            start_state = env.reset(s)
            keep_going = True
            real_instruction = cuda_var(instructions[0][s], setup["cuda"], 0)
            tmp = list(real_instruction.data.cpu()[0].numpy())
            real_instruction_str = debug_untokenize_instruction(tmp)
            write_real_instruction(real_instruction_str)
            #write_instruction(real_instruction_str)
            #instruction_str = real_instruction_str

            image = cuda_var(images[0][s], setup["cuda"], 0)
            label_mask = cuda_var(label_masks[0][s], setup["cuda"], 0)
            affine_g_to_s = affines[0][s]
            print("Your current environment:")
            with open(
                    "/storage/dxsun/unreal_config_nl/configs/configs/random_config_"
                    + str(env_id) + ".json") as fp:
                config = json.load(fp)
            print(config)
            while keep_going:
                write_real_instruction(real_instruction_str)

                while True:
                    cv2.waitKey(200)
                    instruction = read_instruction_file()
                    if instruction == "CMD: Next":
                        print("Advancing")
                        keep_going = False
                        write_empty_instruction()
                        break
                    elif instruction == "CMD: Reset":
                        print("Resetting")
                        env.reset(s)
                        write_empty_instruction()
                    elif len(instruction.split(" ")) > 1:
                        instruction_str = instruction
                        print("Executing: ", instruction_str)
                        break

                if not keep_going:
                    continue

                #instruction_str = read_instruction_file()
                # TODO: Load instruction from file
                tok_instruction = tokenize_instruction(instruction_str,
                                                       word2token)
                instruction_t = torch.LongTensor(tok_instruction).unsqueeze(0)
                instruction_v = cuda_var(instruction_t, setup["cuda"], 0)
                instruction_mask = torch.ones_like(instruction_v)
                tmp = list(instruction_t[0].numpy())
                instruction_dbg_str = debug_untokenize_instruction(
                    tmp, token2term)

                # import matplotlib.pyplot as plt
                #plt.plot(image.squeeze(0).permute(1,2,0).cpu().numpy())
                #plt.show()

                res = model(image, instruction_v, instruction_mask)
                mask_pred = res[0]
                shp = mask_pred.shape
                mask_pred = F.softmax(mask_pred.view([2, -1]), 1).view(shp)
                #mask_pred = softmax2d(mask_pred)

                # TODO: Rotate the mask_pred to the global frame
                affine_s_to_g = np.linalg.inv(affine_g_to_s)
                S = 8.0
                affine_scale_up = np.asarray([[S, 0, 0], [0, S, 0], [0, 0, 1]])
                affine_scale_down = np.linalg.inv(affine_scale_up)

                affine_pred_to_g = np.dot(
                    affine_scale_down, np.dot(affine_s_to_g, affine_scale_up))
                #affine_pred_to_g_t = torch.from_numpy(affine_pred_to_g).float()

                mask_pred_np = mask_pred.data.cpu().numpy()[0].transpose(
                    1, 2, 0)
                mask_pred_g_np = apply_affine(mask_pred_np, affine_pred_to_g,
                                              32, 32)
                print("Sum of global mask: ", mask_pred_g_np.sum())
                mask_pred_g = torch.from_numpy(
                    mask_pred_g_np.transpose(2, 0,
                                             1)).float()[np.newaxis, :, :, :]
                exec_model.set_ground_truth_visitation_d(mask_pred_g)

                # Create a batch axis for pytorch
                #mask_pred_g = affine2d(mask_pred, affine_pred_to_g_t[np.newaxis, :, :])

                mask_pred_np[:, :, 0] -= mask_pred_np[:, :, 0].min()
                mask_pred_np[:, :, 0] /= (mask_pred_np[:, :, 0].max() + 1e-9)
                mask_pred_np[:, :, 0] *= 2.0
                mask_pred_np[:, :, 1] -= mask_pred_np[:, :, 1].min()
                mask_pred_np[:, :, 1] /= (mask_pred_np[:, :, 1].max() + 1e-9)

                presenter = Presenter()
                presenter.show_image(mask_pred_g_np,
                                     "mask_pred_g",
                                     torch=False,
                                     waitkey=1,
                                     scale=4)
                #import matplotlib.pyplot as plt
                #print("image.data shape:", image.data.cpu().numpy().shape)
                #plt.imshow(image.data.squeeze().permute(1,2,0).cpu().numpy())
                #plt.show()
                # presenter.show_image(image.data, "mask_pred_g", torch=False, waitkey=1, scale=4)
                #import pdb; pdb.set_trace()
                pred_viz_np = presenter.overlaid_image(image.data,
                                                       mask_pred_np,
                                                       channel=0)
                # TODO: Don't show labels
                # TODO: OpenCV colours
                #label_mask_np = p.data.cpu().numpy()[0].transpose(1,2,0)
                labl_viz_np = presenter.overlaid_image(image.data,
                                                       label_mask.data,
                                                       channel=0)
                viz_img_np = np.concatenate((pred_viz_np, labl_viz_np), axis=1)
                viz_img_np = pred_viz_np

                viz_img = presenter.overlay_text(viz_img_np,
                                                 instruction_dbg_str)
                cv2.imshow("interactive viz", viz_img)
                cv2.waitKey(100)

                rollout_model(exec_model, env, env_ids[0][s], set_idxs[0][s],
                              seg_idxs[0][s], tok_instruction)
                write_instruction("")
Esempio n. 4
0
 def get_action(self, state, instruction=None, sample=False, rl_rollout=False):
     if self.rviz is not None:
         self.rviz.publish_instruction_text("instruction", debug_untokenize_instruction(instruction))
     return self.get_follow_path_action(state.state), {}
Esempio n. 5
0
 def print_tokenized_instruction(self, instruction):
     instr_str = debug_untokenize_instruction(instruction)
     print("instruction: " + str(instr_str))
Esempio n. 6
0
    def get_action(self, state, instruction, sample=False, rl_rollout=False):
        """
        Given a DroneState (from PomdpInterface) and instruction, produce a numpy 4D action (x, y, theta, pstop)
        :param state: DroneState object with the raw image from the simulator
        :param instruction: Tokenized instruction given the corpus
        :param sample: (Only applies if self.rl): If true, sample action from action distribution. If False, take most likely action.
        #TODO: Absorb corpus within model
        :return:
        """
        self.eval()

        ACTPROF = False
        actprof = SimpleProfiler(print=ACTPROF, torch_sync=ACTPROF)

        states, images_fpv = self.states_to_torch(state)

        first_step = True
        if instruction == self.prev_instruction:
            first_step = False
        if first_step:
            self.reset()
            self.start_poses = self.cam_poses_from_states(states)
            if self.rviz is not None:
                dbg_instr = "\n".join(Presenter().split_lines(
                    debug_untokenize_instruction(instruction), maxchars=45))
                self.rviz.publish_instruction_text("instruction", dbg_instr)

        self.prev_instruction = instruction
        self.seq_step += 1

        instr_len = [len(instruction)] if instruction is not None else None
        instructions = torch.LongTensor(instruction).unsqueeze(0)
        plan_now = self.seq_step % self.s1_params[
            "plan_every_n_steps"] == 0 or first_step

        # Run stage1 visitation prediction
        # TODO: There's a bug here where we ignore images between planning timesteps. That's why must plan every timestep
        if plan_now or True:
            device = next(self.parameters()).device
            images_fpv = images_fpv.to(device)
            states = states.to(device)
            instructions = instructions.to(device)
            self.start_poses = self.start_poses.to(device)

            actprof.tick("start")
            #print("Planning for: " + debug_untokenize_instruction(list(instructions[0].detach().cpu().numpy())))
            self.log_v_dist_w, v_dist_w_poses, self.log_goal_oob_score, rl_outputs = self.stage1_visitation_prediction(
                images_fpv,
                states,
                instructions,
                instr_len,
                plan=[True],
                firstseg=[first_step],
                noisy_start_poses=self.start_poses,
                start_poses=self.start_poses,
                select_only=True,
                rl=True)
            actprof.tick("stage1")

            self.map_coverage_w = rl_outputs["map_coverage_w"]
            self.map_uncoverage_w = rl_outputs["map_uncoverage_w"]
            self.v_dist_w, self.goal_oob_prob_w = self.visitation_softmax(
                self.log_v_dist_w, self.log_goal_oob_score)
            if self.rviz:
                v_dist_w_np = self.v_dist_w[0].data.cpu().numpy().transpose(
                    1, 2, 0)
                # expand to 0-1 range
                v_dist_w_np[:, :, 0] /= (np.max(v_dist_w_np[:, :, 0]) + 1e-10)
                v_dist_w_np[:, :, 1] /= (np.max(v_dist_w_np[:, :, 1]) + 1e-10)
                self.rviz.publish_map("visitation_dist", v_dist_w_np,
                                      self.s1_params["world_size_m"])

        # Transform to robot reference frame
        cam_poses = self.cam_poses_from_states(states)
        # Log-distributions CANNOT be transformed - the transformer fills empty space with zeroes, which makes sense for
        # probability distributions, but makes no sense for likelihood scores
        map_coverage_r, _ = self.map_transformer_w_to_r(
            self.map_coverage_w, None, cam_poses)
        map_uncoverage_r, _ = self.map_transformer_w_to_r(
            self.map_uncoverage_w, None, cam_poses)
        v_dist_r, r_poses = self.map_transformer_w_to_r(
            self.v_dist_w, None, cam_poses)

        # Run stage2 action generation
        if self.rl:
            actprof.tick("pipes")
            # If RL, stage 2 outputs distributions over actions (following torch.distributions API)
            xvel_dist, yawrate_dist, stop_dist, value = self.stage2_action_generation(
                v_dist_r, map_uncoverage_r, eval=True)

            actprof.tick("stage2")
            if sample:
                xvel, yawrate, stop = self.stage2_action_generation.sample_action(
                    xvel_dist, yawrate_dist, stop_dist)
            else:
                xvel, yawrate, stop = self.stage2_action_generation.mode_action(
                    xvel_dist, yawrate_dist, stop_dist)

            actprof.tick("sample")
            xvel_logprob, yawrate_logprob, stop_logprob = self.stage2_action_generation.action_logprob(
                xvel_dist, yawrate_dist, stop_dist, xvel, yawrate, stop)

            xvel = xvel.detach().cpu().numpy()
            yawrate = yawrate.detach().cpu().numpy()
            stop = stop.detach().cpu().numpy()
            xvel_logprob = xvel_logprob.detach()
            yawrate_logprob = yawrate_logprob.detach()
            stop_logprob = stop_logprob.detach()
            value = value.detach()  #.cpu().numpy()

            # Add an empty column for sideways velocity
            act = np.concatenate([xvel, np.zeros(xvel.shape), yawrate, stop])

            # This will be needed to compute rollout statistics later on
            #v_dist_w = self.visitation_softmax(self.log_v_dist_w, self.log_goal_oob_score)

            # Keep all the info we will need later for A2C / PPO training
            # TODO: We assume independence between velocity and stop distributions. Not true, but what ya gonna do?
            rl_data = {
                "policy_input": v_dist_r[0].detach(),
                "v_dist_w": self.v_dist_w[0].detach(),
                "policy_input_b": map_uncoverage_r[0].detach(),
                "value_pred": value[0],
                "xvel": xvel,
                "yawrate": yawrate,
                "stop": stop,
                "xvel_logprob": xvel_logprob,
                "yawrate_logprob": yawrate_logprob,
                "stop_logprob": stop_logprob,
                "action_logprob": xvel_logprob + stop_logprob + yawrate_logprob
            }
            actprof.tick("end")
            actprof.loop()
            actprof.print_stats(1)
            if rl_rollout:
                return act, rl_data
            else:
                return act

        else:
            action = self.stage2_action_generation(v_dist_r,
                                                   firstseg=[first_step],
                                                   eval=True)
            output_action = action.squeeze().data.cpu().numpy()
            stop_prob = output_action[3]
            output_stop = 1 if stop_prob > self.s2_params[
                "stop_threshold"] else 0
            output_action[3] = output_stop

            return output_action
Esempio n. 7
0
    def get_action(self, state, instruction):
        """
        Given a DroneState (from PomdpInterface) and instruction, produce a numpy 4D action (x, y, theta, pstop)
        :param state: DroneState object with the raw image from the simulator
        :param instruction: Tokenized instruction given the corpus
        #TODO: Absorb corpus within model
        :return:
        """
        # TODO: Simplify this
        self.eval()
        images_np_pure = state.image
        state_np = state.state

        #print("Act: " + debug_untokenize_instruction(instruction))

        images_np = standardize_image(images_np_pure)
        image_fpv = Variable(none_padded_seq_to_tensor([images_np]))
        state = Variable(none_padded_seq_to_tensor([state_np]))
        # Add the batch dimension

        first_step = True
        if instruction == self.prev_instruction:
            first_step = False
        self.prev_instruction = instruction
        instruction_str = debug_untokenize_instruction(instruction)

        # TODO: Move this to PomdpInterface (for now it's here because this is already visualizing the maps)
        if first_step:
            if self.rviz is not None:
                self.rviz.publish_instruction_text(
                    "instruction", debug_untokenize_instruction(instruction))

        img_in_t = image_fpv
        img_in_t.volatile = True

        instr_len = [len(instruction)] if instruction is not None else None
        instruction = torch.LongTensor(instruction).unsqueeze(0)
        instruction = cuda_var(instruction, self.is_cuda, self.cuda_device)

        state.volatile = True

        if self.is_cuda:
            if img_in_t is not None:
                img_in_t = img_in_t.cuda(self.cuda_device)
            state = state.cuda(self.cuda_device)

        step_enc = None
        plan_now = None

        self.seq_step += 1

        action = self(img_in_t,
                      state,
                      instruction,
                      instr_len,
                      plan=plan_now,
                      pos_enc=step_enc)

        passive_mode_debug_projections = True
        if passive_mode_debug_projections:
            self.show_landmark_locations(loop=False, states=state)
            self.reset()

        # Run auxiliary objectives for debugging purposes (e.g. to compute classification predictions)
        if self.params.get("run_auxiliaries_at_test_time"):
            _, _ = self.aux_losses.calculate_aux_loss(self.tensor_store,
                                                      reduce_average=True)
            overlaid = self.get_overlaid_classification_results(
                whole_batch=False)

        # Save materials for analysis and presentation
        if self.params["write_figures"]:
            self.save_viz(images_np_pure, instruction_str)

        output_action = action.squeeze().data.cpu().numpy()
        stop_prob = output_action[3]
        output_stop = 1 if stop_prob > self.params["stop_p"] else 0
        output_action[3] = output_stop

        return output_action
Esempio n. 8
0
    def get_action(self, state, instruction, sample=False, rl_rollout=False):
        """
        Given a DroneState (from PomdpInterface) and instruction, produce a numpy 4D action (x, y, theta, pstop)
        :param state: DroneState object with the raw image from the simulator
        :param instruction: Tokenized instruction given the corpus
        :param sample: (Only applies if self.rl): If true, sample action from action distribution. If False, take most likely action.
        :return:
        """
        self.eval()

        states, images_fpv = self.states_to_torch(state)

        first_step = True
        if instruction == self.prev_instruction:
            first_step = False
        if first_step:
            self.reset()
            self.start_cam_poses = self.cam_poses_from_states(states)
            if self.rviz is not None:
                dbg_instr = "\n".join(Presenter().split_lines(
                    debug_untokenize_instruction(instruction), maxchars=45))
                self.rviz.publish_instruction_text("instruction", dbg_instr)

        self.prev_instruction = instruction
        self.seq_step += 1

        instr_len = [len(instruction)] if instruction is not None else None
        instructions = torch.LongTensor(instruction).unsqueeze(0)
        plan_now = self.seq_step % self.s1_params[
            "plan_every_n_steps"] == 0 or first_step

        # Run stage1 visitation prediction
        # TODO: There's a bug here where we ignore images between planning timesteps. That's why must plan every timestep
        if plan_now or True:
            device = next(self.parameters()).device
            images_fpv = images_fpv.to(device)
            states = states.to(device)
            instructions = instructions.to(device)
            self.start_cam_poses = self.start_cam_poses.cuda(device)

            self.actprof.tick("start")
            #print("Planning for: " + debug_untokenize_instruction(list(instructions[0].detach().cpu().numpy())))
            self.log_v_dist_w, v_dist_w_poses, rl_outputs = self.stage1_visitation_prediction(
                images_fpv,
                states,
                instructions,
                instr_len,
                plan=[True],
                firstseg=[first_step],
                noisy_start_poses=self.start_cam_poses,
                start_poses=self.start_cam_poses,
                select_only=True,
                rl=True,
                noshow=True)
            self.actprof.tick("stage1")
            self.map_uncoverage_w = rl_outputs["map_uncoverage_w"]
            self.v_dist_w = self.log_v_dist_w.softmax()
            # TODO: Fix
            if self.rviz:  #a.k.a False
                v_dist_w_np = self.v_dist_w.inner_distribution[0].data.cpu(
                ).numpy().transpose(1, 2, 0)
                # expand to 0-1 range
                v_dist_w_np[:, :, 0] /= (np.max(v_dist_w_np[:, :, 0]) + 1e-10)
                v_dist_w_np[:, :, 1] /= (np.max(v_dist_w_np[:, :, 1]) + 1e-10)
                self.rviz.publish_map("visitation_dist", v_dist_w_np,
                                      self.s1_params["world_size_m"])

        # Transform to robot reference frame
        drn_poses = self.poses_from_states(states)
        # Log-distributions CANNOT be transformed - the transformer fills empty space with zeroes, which makes sense for
        # probability distributions, but makes no sense for likelihood scores
        x = self.v_dist_w.inner_distribution
        xr, r_poses = self.map_transformer_w_to_r(x, None, drn_poses)
        v_dist_r = Partial2DDistribution(xr, self.v_dist_w.outer_prob_mass)

        structured_map_info_r, map_info_w = self.build_map_structured_input(
            self.map_uncoverage_w, drn_poses)

        if self.oracle_stage1:
            # Ground truth visitation distributions (in start and global frames)
            assert self.current_segment is not None, "start_segment_rollout must be called before rolling out model that uses ground truth"
            env_id, set_idx, seg_idx = self.current_segment
            v_dist_w_gt = aup.resolve_and_get_ground_truth_static_global(
                env_id, set_idx, seg_idx, self.s1_params["global_map_size"],
                self.s1_params["world_size_px"]).to(images_fpv.device)
            v_dist_r_ground_truth_select, poses_r = self.map_transformer_w_to_r(
                v_dist_w_gt, None, drn_poses)
            map_uncoverage_r = structured_map_info_r[:, 0, :, :]
            # PVNv2: Mask the visitation distributions according to observability thus far:
            if self.s1_params["clip_observability"]:
                v_dist_r_gt_masked = Partial2DDistribution.from_distribution_and_mask(
                    v_dist_r_ground_truth_select, 1 - map_uncoverage_r)
            # PVNv1: Have P(oob)=0, and use unmasked ground-truth visitation distributions
            else:
                v_dist_r_gt_masked = Partial2DDistribution(
                    v_dist_r_ground_truth_select,
                    torch.zeros_like(v_dist_r_ground_truth_select[:, :, 0, 0]))
            v_dist_r = v_dist_r_gt_masked

        if False:
            Presenter().show_image(structured_map_info_r,
                                   "map_struct",
                                   scale=4,
                                   waitkey=1)
            v_dist_r.show("v_dist_r", scale=4, waitkey=1)

        # Run stage2 action generation
        if self.rl:
            self.actprof.tick("pipes")
            # If RL, stage 2 outputs distributions over actions (following torch.distributions API)
            xvel_dist, yawrate_dist, stop_dist, value = self.stage2_action_generation(
                v_dist_r, structured_map_info_r, eval=True)

            self.actprof.tick("stage2")
            if sample:
                xvel, yawrate, stop = self.stage2_action_generation.sample_action(
                    xvel_dist, yawrate_dist, stop_dist)
            else:
                xvel, yawrate, stop = self.stage2_action_generation.mode_action(
                    xvel_dist, yawrate_dist, stop_dist)

            self.actprof.tick("sample")
            xvel_logprob, yawrate_logprob, stop_logprob = self.stage2_action_generation.action_logprob(
                xvel_dist, yawrate_dist, stop_dist, xvel, yawrate, stop)

            xvel = xvel.detach().cpu().numpy()
            yawrate = yawrate.detach().cpu().numpy()
            stop = stop.detach().cpu().numpy()
            xvel_logprob = xvel_logprob.detach()
            yawrate_logprob = yawrate_logprob.detach()
            stop_logprob = stop_logprob.detach()
            if value is not None:
                value = value.detach()  #.cpu().numpy()

            if self.s2_params.get("use_stop_threshold"):
                stop_prob = stop_dist.probs.detach().item()
                stop = 1 if stop_prob > self.s2_params.get(
                    "stop_threshold") else 0
                stop = np.ones_like(xvel) * stop
                print("stop prob: ", stop_prob, " -> ", stop)

            # Add an empty column for sideways velocity
            act = np.concatenate([xvel, np.zeros(xvel.shape), yawrate, stop])
            # This will be needed to compute rollout statistics later on
            #v_dist_w = self.visitation_softmax(self.log_v_dist_w, self.log_goal_oob_score)

            # Keep all the info we will need later for A2C / PPO training
            # TODO: We assume independence between velocity and stop distributions. Not true, but what ya gonna do?
            rl_data = {
                "policy_input": v_dist_r.detach(),
                "policy_input_b": structured_map_info_r[0].detach(),
                "v_dist_w": self.v_dist_w.inner_distribution[0].detach(),
                "value_pred": value[0] if value else None,
                "xvel": xvel,
                "yawrate": yawrate,
                "stop": stop,
                "xvel_logprob": xvel_logprob,
                "yawrate_logprob": yawrate_logprob,
                "stop_logprob": stop_logprob,
                "action_logprob": xvel_logprob + stop_logprob + yawrate_logprob
            }
            self.actprof.tick("end")
            self.actprof.loop()
            self.actprof.print_stats(1)
            if rl_rollout:
                return act, rl_data
            else:
                viz_data = self.make_viz_dict_from_stage1_internals()
                viz_data["v_dist_r_inner"] = v_dist_r.inner_distribution[
                    0].detach().cpu().numpy()
                viz_data["v_dist_r_outer"] = v_dist_r.outer_prob_mass[
                    0].detach().cpu().numpy()
                viz_data["v_dist_w_inner"] = self.v_dist_w.inner_distribution[
                    0].detach().cpu().numpy()
                viz_data["v_dist_w_outer"] = self.v_dist_w.outer_prob_mass[
                    0].detach().cpu().numpy()
                viz_data["map_struct"] = structured_map_info_r[0].detach().cpu(
                ).numpy()
                viz_data["BM_W"] = map_info_w[0].detach().cpu().numpy()
                return act, viz_data

        else:
            raise NotImplemented(
                "Non-RL learning mode no longer support it - just use RL learning mode as it is more general!"
            )
Esempio n. 9
0
    def get_action(self, state, instruction):
        """
        Given a DroneState (from PomdpInterface) and instruction, produce a numpy 4D action (x, y, theta, pstop)
        :param state: DroneState object with the raw image from the simulator
        :param instruction: Tokenized instruction given the corpus
        #TODO: Absorb corpus within model
        :return:
        """
        # TODO: Simplify this
        self.eval()
        images_np_pure = state.image
        state_np = state.state

        #print("Act: " + debug_untokenize_instruction(instruction))

        images_np = standardize_image(images_np_pure)
        image_fpv = Variable(none_padded_seq_to_tensor([images_np]))
        state = Variable(none_padded_seq_to_tensor([state_np]))
        # Add the batch dimension

        first_step = True
        if instruction == self.prev_instruction:
            first_step = False
        self.prev_instruction = instruction
        instruction_str = debug_untokenize_instruction(instruction)

        # TODO: Move this to PomdpInterface (for now it's here because this is already visualizing the maps)
        if first_step:
            if self.rviz is not None:
                self.rviz.publish_instruction_text(
                    "instruction", debug_untokenize_instruction(instruction))
        #if first_step:
        #    say(debug_untokenize_instruction(instruction))

        img_in_t = image_fpv
        img_in_t.volatile = True

        instr_len = [len(instruction)] if instruction is not None else None
        instruction = torch.LongTensor(instruction).unsqueeze(0)
        instruction = cuda_var(instruction, self.is_cuda, self.cuda_device)

        state.volatile = True

        if self.is_cuda:
            if img_in_t is not None:
                img_in_t = img_in_t.cuda(self.cuda_device)
            state = state.cuda(self.cuda_device)

        step_enc = None
        plan_now = None

        self.seq_step += 1

        action = self(img_in_t,
                      state,
                      instruction,
                      instr_len,
                      plan=plan_now,
                      pos_enc=step_enc)

        # Save materials for analysis and presentation
        if self.params["write_figures"]:
            self.save_viz(images_np_pure, instruction_str)

        output_action = action.squeeze().data.cpu().numpy()
        stop_prob = output_action[3]
        print(f"P(STOP): {stop_prob}")
        output_stop = 1 if stop_prob > self.params["stop_p"] else 0
        output_action[3] = output_stop

        return output_action