コード例 #1
0
ファイル: fpv_to_global_map.py プロジェクト: hyzcn/drif
def runtest_fpv_to_global_map():
    img_to_map = FPVToGlobalMap(source_map_size=32,
                                world_size_px=32,
                                world_size=30,
                                img_w=256,
                                img_h=144,
                                res_channels=3,
                                map_channels=3,
                                img_dbg=True)

    import pickle
    import cv2
    with open(test_data_path(), "rb") as fp:
        test_data = pickle.load(fp)

    for i in range(len(test_data["images"])):
        image = test_data["images"][i]
        pose = test_data["cam_poses"][i]

        cv2.imshow("fpv_image", cv2.cvtColor(image, cv2.COLOR_RGB2BGR))
        cv2.waitKey(1)

        image = standardize_image(image)
        image_t = Variable(torch.from_numpy(image))
        pose_t = pose.to_torch().to_var()
        pose_t = Pose(pose_t.position.unsqueeze(0),
                      pose_t.orientation.unsqueeze(0))
        image_t = image_t.unsqueeze(0)

        projected, poses = img_to_map(image_t, pose_t, None, show="yes")
        print("Ding")
        print("globalish poses: ", poses)
コード例 #2
0
ファイル: top_down_dataset.py プロジェクト: pianpwk/drif
    def get_top_down_image_env(self, env_id, egocentric=False):
        """
        To be called externally to retrieve a top-down environment image oriented with the start of the requested segment
        :param env_id:  environment id
        :return:
        """
        path = load_path(env_id)
        env_image_in = load_env_img(env_id, self.map_w, self.map_h)

        # If we need to return a bigger image resolution than we loaded
        if self.map_w != self.img_w or self.map_h != self.img_h:
            env_image = np.zeros(
                [self.img_h, self.img_w, env_image_in.shape[2]])
            env_image[0:self.map_h, 0:self.map_w, :] = env_image_in
        else:
            env_image = env_image_in

        #path_img = cf_to_img(path, [env_image.shape[0], env_image.shape[1]])
        #self.plot_path_on_img(env_image, path_img)

        env_image = standardize_image(env_image)
        env_img_t = torch.from_numpy(env_image).unsqueeze(0).float()
        #presenter = Presenter()
        #presenter.show_image(env_img_t[0], "data_img", torch=True, scale=1)
        return env_img_t
コード例 #3
0
ファイル: fpv_image_dataset.py プロジェクト: pianpwk/drif
    def __getitem__(self, index):
        prof = SimpleProfiler(torch_sync=PROFILE, print=PROFILE)
        prof.tick("out")
        if type(index) == int:
            image = self.images[index]
            lm_pos_fpv = self.lm_pos_fpv[index]
            lm_indices = self.lm_idx[index]
            lm_pos_map = self.lm_pos_map[index]
            prof.tick("retrieve data")

            # data augmentation. If eval no data augmentation.
            out_img, out_lm_indices, out_lm_pos_fpv = data_augmentation(
                image, lm_indices, lm_pos_fpv, self.img_h, self.img_w, self.eval, prof)
            if (len(out_lm_indices) == 0) | (out_lm_indices is None):
                out_img, out_lm_indices, out_lm_pos_fpv = data_augmentation(
                    image, lm_indices, lm_pos_fpv, self.img_h, self.img_w, True, prof)

            out_img = standardize_image(np.array(out_img))
            out_img = torch.from_numpy(out_img)

            out_lm_indices = torch.tensor(out_lm_indices)
            out_lm_pos_fpv = torch.tensor(out_lm_pos_fpv)

            sample = {"poses": self.poses[index],
                      "instructions": [],  # self.instructions[index],
                      "images": out_img,
                      "env_ids": self.env_ids_decompressed[index],
                      "lm_pos_fpv": out_lm_pos_fpv,
                      "lm_indices": out_lm_indices,
                      "lm_pos_map": lm_pos_map}
            prof.tick("dic")
            prof.print_stats()

        """
        elif type(index) == list:
            out_images_list, out_lm_indices_list, out_lm_pos_fpv_list = [], [], []
            for i in index:
                image = self.images[i]
                lm_pos_fpv = self.lm_pos_fpv[i]
                lm_indices = self.lm_idx[i]

                out_img, out_lm_indices, out_lm_pos_fpv = data_augmentation(image, lm_indices, lm_pos_fpv, IMG_HEIGHT, IMG_WIDTH, self.eval, prof)

                if (len(out_lm_indices) == 0) | (out_lm_indices is None):
                    out_img, out_lm_indices, out_lm_pos_fpv = data_augmentation(image, lm_indices, lm_pos_fpv, IMG_HEIGHT, IMG_WIDTH, True, prof)

                out_images_list.append(out_img)
                out_lm_indices_list.append(out_lm_indices)
                out_lm_pos_fpv_list.append(out_lm_pos_fpv)

            sample = {"poses": [self.poses[i] for i in index],
                      "instructions": [],  # self.instructions[index],
                      "lm_mentioned": [],
                      "images": out_images_list,
                      "env_ids": [self.env_ids_decompressed[i] for i in index],
                      "lm_pos_fpv": out_lm_pos_fpv_list,
                      "lm_idx": out_lm_indices_list}
        """
        return sample
コード例 #4
0
ファイル: model_sm_rss_global.py プロジェクト: dxsun/drif
    def get_action(self, state, instruction):
        """
        Given a DroneState (from PomdpInterface) and instruction, produce a numpy 4D action (x, y, theta, pstop)
        :param state: DroneState object with the raw image from the simulator
        :param instruction: Tokenized instruction given the corpus
        #TODO: Absorb corpus within model
        :return:
        """
        # TODO: Simplify this
        self.eval()
        images_np_pure = state.image
        state_np = state.state

        #print("Act: " + debug_untokenize_instruction(instruction))

        images_np = standardize_image(images_np_pure)
        image_fpv = Variable(none_padded_seq_to_tensor([images_np]))
        state = Variable(none_padded_seq_to_tensor([state_np]))
        # Add the batch dimension

        first_step = True
        if instruction == self.prev_instruction:
            first_step = False
        self.prev_instruction = instruction

        img_in_t = image_fpv
        img_in_t.volatile = True

        instr_len = [len(instruction)] if instruction is not None else None
        instruction = torch.LongTensor(instruction).unsqueeze(0)
        instruction = cuda_var(instruction, self.is_cuda, self.cuda_device)

        state.volatile = True

        if self.is_cuda:
            if img_in_t is not None:
                img_in_t = img_in_t.cuda(self.cuda_device)
            state = state.cuda(self.cuda_device)

        step_enc = None
        plan_now = None

        self.seq_step += 1

        action = self(img_in_t, state, instruction, instr_len, plan=plan_now, pos_enc=step_enc)

        # Save materials for paper and presentation
        if False:
            self.save_viz(images_np_pure)

        output_action = action.squeeze().data.cpu().numpy()
        stop_prob = output_action[3]
        output_stop = 1 if stop_prob > 0.5 else 0
        output_action[3] = output_stop

        return output_action
コード例 #5
0
    def get_action(self, state, instruction):
        """
        Given a DroneState (from PomdpInterface) and instruction, produce a numpy 4D action (x, y, theta, pstop)
        :param state: DroneState object with the raw image from the simulator
        :param instruction: Tokenized instruction given the corpus
        #TODO: Absorb corpus within model
        :return:
        """
        # TODO: Simplify this
        self.eval()
        images_np_pure = state.image
        state_np = state.state

        #print("Act: " + debug_untokenize_instruction(instruction))

        images_np = standardize_image(images_np_pure)
        image_fpv = Variable(none_padded_seq_to_tensor([images_np]))
        state = Variable(none_padded_seq_to_tensor([state_np]))
        # Add the batch dimension

        first_step = True
        if instruction == self.prev_instruction:
            first_step = False
        self.prev_instruction = instruction

        img_in_t = image_fpv
        img_in_t.volatile = True

        instr_len = [len(instruction)] if instruction is not None else None
        for tok in instruction:
            if tok >= self.params["vocab_size"] or tok < 0:
                raise Exception("Word embeddings out of bounds")
        instruction = torch.LongTensor(instruction).unsqueeze(0)
        instruction = cuda_var(instruction, self.is_cuda, self.cuda_device)

        state.volatile = True

        if self.is_cuda:
            img_in_t = img_in_t.cuda(self.cuda_device)

        self.seq_step += 1

        action = self(img_in_t, instruction, instr_len)

        output_action = action.squeeze().data.cpu().numpy()
        stop_prob = output_action[3]
        output_stop = 1 if (stop_prob > 0.5
                            or self.seq_step >= self.trajectory_len - 5) else 0
        output_action[3] = output_stop

        #print("action: ", output_action)

        return output_action
コード例 #6
0
def gen_top_down_image(env_top_down_image, affine, img_w, img_h, map_w, map_h):
    #top_down_image = load_env_img(env_id)
    # TODO: Check for overflowz
    seg_img = env_top_down_image.copy()
    seg_img_rot = apply_affine(seg_img, affine, img_w, img_h)

    if DEBUG:
        cv2.imshow("rot_top", seg_img_rot)
        cv2.waitKey(10)

    #self.latest_rot_img_dbg = seg_img_rot

    seg_img_rot = standardize_image(seg_img_rot)
    seg_img_t = torch.from_numpy(seg_img_rot).unsqueeze(0).float()

    return seg_img_t
コード例 #7
0
ファイル: model_gs_fpv_mem.py プロジェクト: pianpwk/drif
    def get_action(self, state, instruction):
        """
        Given a DroneState (from PomdpInterface) and instruction, produce a numpy 4D action (x, y, theta, pstop)
        :param state: DroneState object with the raw image from the simulator
        :param instruction: Tokenized instruction given the corpus
        #TODO: Absorb corpus within model
        :return:
        """
        # TODO: Simplify this
        self.eval()
        images_np_pure = state.image
        state_np = state.state

        #print("Act: " + debug_untokenize_instruction(instruction))

        images_np = standardize_image(images_np_pure)
        image_fpv = Variable(none_padded_seq_to_tensor([images_np]))
        state = Variable(none_padded_seq_to_tensor([state_np]))
        self.prev_instruction = instruction

        img_in_t = image_fpv
        img_in_t.volatile = True

        instr_len = [len(instruction)] if instruction is not None else None
        instruction = torch.LongTensor(instruction).unsqueeze(0)
        instruction = cuda_var(instruction, self.is_cuda, self.cuda_device)

        state.volatile = True

        if self.is_cuda:
            img_in_t = img_in_t.cuda(self.cuda_device)
            state = state.cuda(self.cuda_device)

        self.seq_step += 1

        action = self(img_in_t, state, instruction, instr_len)

        output_action = action.squeeze().data.cpu().numpy()
        print("action: ", output_action)

        stop_prob = output_action[3]
        output_stop = 1 if stop_prob > self.params["stop_threshold"] else 0
        output_action[3] = output_stop

        return output_action
コード例 #8
0
ファイル: top_down_dataset_sm.py プロジェクト: dxsun/drif
    def __getitem__(self, idx):
        if self.seg_level:
            env_id = self.seg_list[idx][0]
            set_idx = self.seg_list[idx][1]
            seg_idx = self.seg_list[idx][2]
        else:
            env_id = self.env_list[idx]

        print("top_down_dataset_sm __getitem__ load_env_config")
        env_conf_json = load_env_config(env_id)
        landmark_names, landmark_indices, landmark_positions = get_landmark_locations_airsim(env_conf_json)

        top_down_image = load_env_img(env_id)

        path = load_path(env_id)

        img_x = top_down_image.shape[0]
        img_y = top_down_image.shape[1]

        path_in_img_coords = self.cf_to_img(img_x, path)
        landmark_pos_in_img = self.as_to_img(img_x, np.asarray(landmark_positions)[:, 0:2])
        self.pos_rand_image = self.pos_rand_range * img_x

        #self.plot_path_on_img(top_down_image, path_in_img_coords)
        #self.plot_path_on_img(top_down_image, landmark_pos_in_img)
        #cv2.imshow("top_down", top_down_image)
        #cv2.waitKey()

        input_images = []
        input_instructions = []
        label_images = []
        aux_labels = []

        # Somehow load the instruction with the start and end indices for each of the N segments
        if self.seg_level:
            instruction_segments = [self.all_instr[env_id][set_idx]["instructions"][seg_idx]]
        else:
            instruction_segments = self.all_instr[env_id][0]["instructions"]

        for seg_idx, seg in enumerate(instruction_segments):
            start_idx = seg["start_idx"]
            end_idx = seg["end_idx"]
            instruction = seg["instruction"]

            # TODO: Check for overflowz
            seg_path = path_in_img_coords[start_idx:end_idx]
            seg_img = top_down_image.copy()

            #test_plot = self.plot_path_on_img(seg_img, seg_path)
            # TODO: Validate the 0.5 choice, should it be 2?
            affine, cropsize = self.get_affine_matrix(seg_path, 0, [int(img_x / 2), int(img_y / 2)], 0.5)
            if affine is None:
                continue
            seg_img_rot = self.apply_affine(seg_img, affine, cropsize)

            seg_labels = np.zeros_like(seg_img[:, :, 0:1]).astype(float)
            seg_labels = self.plot_path_on_img(seg_labels, seg_path)
            seg_labels = gaussian_filter(seg_labels, 4)
            seg_labels_rot = self.apply_affine(seg_labels, affine, cropsize)

            #seg_labels_rot = gaussian_filter(seg_labels_rot, 4)
            seg_labels_rot = self.normalize_0_1(seg_labels_rot)

            # Change to true to visualize the paths / labels
            if False:
                cv2.imshow("rot_img", seg_img_rot)
                cv2.imshow("seg_labels", seg_labels_rot)
                rot_viz = seg_img_rot.astype(np.float64) / 512
                rot_viz[:, :, 0] += seg_labels_rot.squeeze()
                cv2.imshow("rot_viz", rot_viz)
                cv2.waitKey(0)

            tok_instruction = tokenize_instruction(instruction, self.word2token)
            instruction_t = torch.LongTensor(tok_instruction).unsqueeze(0)

            # Get landmark classification labels
            landmark_pos_in_seg_img = self.apply_affine_on_pts(landmark_pos_in_img, affine)

            # Down-size images and labels if requested by the model
            if self.img_scale != 1.0:
                seg_img_rot = transform.resize(
                    seg_img_rot,
                    [seg_img_rot.shape[0] * self.img_scale,
                     seg_img_rot.shape[1] * self.img_scale], mode="constant")
                seg_labels_rot = transform.resize(
                    seg_labels_rot,
                    [seg_labels_rot.shape[0] * self.img_scale,
                     seg_labels_rot.shape[1] * self.img_scale], mode="constant")
                landmark_pos_in_seg_img = landmark_pos_in_seg_img * self.img_scale

            seg_img_rot = standardize_image(seg_img_rot)
            seg_labels_rot = standardize_image(seg_labels_rot)
            seg_img_t = torch.from_numpy(seg_img_rot).unsqueeze(0).float()
            seg_labels_t = torch.from_numpy(seg_labels_rot).unsqueeze(0).float()

            landmark_pos_t = torch.from_numpy(landmark_pos_in_seg_img).unsqueeze(0)
            landmark_indices_t = torch.LongTensor(landmark_indices).unsqueeze(0)

            mask1 = torch.gt(landmark_pos_t, 0)
            mask2 = torch.lt(landmark_pos_t, seg_img_t.size(2))
            mask = mask1 * mask2
            mask = mask[:, :, 0] * mask[:, :, 1]
            mask = mask

            landmark_pos_t = torch.masked_select(landmark_pos_t, mask.unsqueeze(2).expand_as(landmark_pos_t)).view([-1, 2])
            landmark_indices_t = torch.masked_select(landmark_indices_t, mask).view([-1])

            mentioned_names, mentioned_indices = get_mentioned_landmarks(self.thesaurus, instruction)
            mentioned_labels_t = empty_float_tensor(list(landmark_indices_t.size())).long()
            for i, landmark_idx_present in enumerate(landmark_indices_t):
                if landmark_idx_present in mentioned_indices:
                    mentioned_labels_t[i] = 1

            aux_label = {
                "landmark_pos": landmark_pos_t,
                "landmark_indices": landmark_indices_t,
                "landmark_mentioned": mentioned_labels_t,
                "visible_mask": mask,
            }

            if self.include_instr_negatives:
                # If we are to be using similar instructions according to the json file, then
                # initialize choices with similar instructions. Otherwise let choices be empty, and they will
                # be filled in the following lines.
                if self.instr_negatives_similar_only:
                    choices = self.similar_instruction_map[str(env_id)][str(seg_idx)]
                else:
                    choices = []
                # If there are no similar instructions to this instruction, pick a completely random instruction
                if len(choices) == 0:
                    while len(choices) == 0:
                        env_options = list(self.similar_instruction_map.keys())
                        random_env = random.choice(env_options)
                        seg_options = list(self.similar_instruction_map[random_env].keys())
                        if len(seg_options) == 0:
                            continue
                        random_seg = random.choice(seg_options)
                        choices = self.similar_instruction_map[random_env][random_seg]

                pick = random.choice(choices)
                picked_env = pick["env_id"]
                picked_seg = pick["seg_idx"]
                picked_set = pick["set_idx"]
                picked_instruction = self.all_instr[picked_env][picked_set]["instructions"][picked_seg]["instruction"]
                tok_fake_instruction = tokenize_instruction(picked_instruction, self.word2token)
                aux_label["negative_instruction"] = torch.LongTensor(tok_fake_instruction).unsqueeze(0)

            input_images.append(seg_img_t)
            input_instructions.append(instruction_t)
            label_images.append(seg_labels_t)
            aux_labels.append(aux_label)

        return [input_images, input_instructions, label_images, aux_labels]
コード例 #9
0
ファイル: model_gsmn_bidomain.py プロジェクト: pianpwk/drif
    def get_action(self, state, instruction):
        """
        Given a DroneState (from PomdpInterface) and instruction, produce a numpy 4D action (x, y, theta, pstop)
        :param state: DroneState object with the raw image from the simulator
        :param instruction: Tokenized instruction given the corpus
        #TODO: Absorb corpus within model
        :return:
        """
        # TODO: Simplify this
        self.eval()
        images_np_pure = state.image
        state_np = state.state

        #print("Act: " + debug_untokenize_instruction(instruction))

        images_np = standardize_image(images_np_pure)
        image_fpv = Variable(none_padded_seq_to_tensor([images_np]))
        state = Variable(none_padded_seq_to_tensor([state_np]))
        # Add the batch dimension

        first_step = True
        if instruction == self.prev_instruction:
            first_step = False
        self.prev_instruction = instruction
        instruction_str = debug_untokenize_instruction(instruction)

        # TODO: Move this to PomdpInterface (for now it's here because this is already visualizing the maps)
        if first_step:
            if self.rviz is not None:
                self.rviz.publish_instruction_text(
                    "instruction", debug_untokenize_instruction(instruction))

        img_in_t = image_fpv
        img_in_t.volatile = True

        instr_len = [len(instruction)] if instruction is not None else None
        instruction = torch.LongTensor(instruction).unsqueeze(0)
        instruction = cuda_var(instruction, self.is_cuda, self.cuda_device)

        state.volatile = True

        if self.is_cuda:
            if img_in_t is not None:
                img_in_t = img_in_t.cuda(self.cuda_device)
            state = state.cuda(self.cuda_device)

        step_enc = None
        plan_now = None

        self.seq_step += 1

        action = self(img_in_t,
                      state,
                      instruction,
                      instr_len,
                      plan=plan_now,
                      pos_enc=step_enc)

        passive_mode_debug_projections = True
        if passive_mode_debug_projections:
            self.show_landmark_locations(loop=False, states=state)
            self.reset()

        # Run auxiliary objectives for debugging purposes (e.g. to compute classification predictions)
        if self.params.get("run_auxiliaries_at_test_time"):
            _, _ = self.aux_losses.calculate_aux_loss(self.tensor_store,
                                                      reduce_average=True)
            overlaid = self.get_overlaid_classification_results(
                whole_batch=False)

        # Save materials for analysis and presentation
        if self.params["write_figures"]:
            self.save_viz(images_np_pure, instruction_str)

        output_action = action.squeeze().data.cpu().numpy()
        stop_prob = output_action[3]
        output_stop = 1 if stop_prob > self.params["stop_p"] else 0
        output_action[3] = output_stop

        return output_action
コード例 #10
0
ファイル: map_affine_tests.py プロジェクト: hyzcn/drif
def map_affine_test():
    img = load_env_img(2, 128, 128)
    img = standardize_image(img)
    img = torch.from_numpy(img).float().unsqueeze(0)

    pos = np.asarray([15, 15, 0])
    quat = euler.euler2quat(0, 0, 0)
    pose0 = Pose(pos[np.newaxis, :], quat[np.newaxis, :])

    theta1 = 0.5
    pos = np.asarray([15, 15, 0])
    quat = euler.euler2quat(0, 0, theta1)
    pose1 = Pose(pos[np.newaxis, :], quat[np.newaxis, :])

    D = 10.0
    pos = np.asarray([15 + D * math.cos(theta1), 15 + D * math.sin(theta1), 0])
    quat = euler.euler2quat(0, 0, theta1)
    pose2 = Pose(pos[np.newaxis, :], quat[np.newaxis, :])

    affine = MapAffine(128, 128, 128)
    res1 = affine(img, pose0, pose1)
    res2 = affine(res1, pose1, pose2)
    res3 = affine(img, pose0, pose2)

    prof = SimpleProfiler(torch_sync=True, print=True)
    affinebig = MapAffine(128, 256, 128)
    prof.tick("init")
    res3big = affinebig(img, pose0, pose2)
    prof.tick("affinebig")

    img = load_env_img(2, 32, 32)
    img = standardize_image(img)
    img = torch.from_numpy(img).float().unsqueeze(0).cuda()
    affines = MapAffine(32, 64, 32).cuda()
    torch.cuda.synchronize()
    prof.tick("init")
    res3s = affines(img, pose0, pose2)
    prof.tick("affines")

    prof.print_stats()

    print("Start pose: ", pose0)
    print("    Pose 1: ", pose1)
    print("    Pose 2: ", pose2)

    print("Res2, Res3 and Res3Big should align!")

    Presenter().show_image(img[0], "img", torch=True, waitkey=False, scale=2)
    Presenter().show_image(res1.data[0],
                           "res_1",
                           torch=True,
                           waitkey=False,
                           scale=2)
    Presenter().show_image(res2.data[0],
                           "res_2",
                           torch=True,
                           waitkey=False,
                           scale=2)
    Presenter().show_image(res3.data[0],
                           "res_3",
                           torch=True,
                           waitkey=False,
                           scale=2)
    Presenter().show_image(res3big.data[0],
                           "res3big",
                           torch=True,
                           waitkey=True,
                           scale=2)
コード例 #11
0
ファイル: map_affine_tests.py プロジェクト: hyzcn/drif
def affine_2d_test():

    img = load_env_img(2, 128, 128)
    img = standardize_image(img)
    img = torch.from_numpy(img).float().unsqueeze(0)

    px = 64
    py = 64
    theta = 0.5

    c = math.cos(theta)
    s = math.sin(theta)

    t_p = torch.FloatTensor([[1, 0, px], [0, 1, py], [0, 0, 1]]).unsqueeze(0)

    t_r = torch.FloatTensor([[c, -s, 0], [s, c, 0], [0, 0, 1]]).unsqueeze(0)

    mat_np = np.dot(t_p.squeeze().numpy(), t_r.squeeze().numpy())
    mat_np_t = torch.from_numpy(mat_np).unsqueeze(0)

    # For some forsaken reason rightmultiplying seems to mean applying the transformation second
    mat = torch.bmm(t_p, t_r)
    #mat1 = t_p
    #mat2 = t_r

    affine_2d = Affine2D()

    res1 = affine_2d(Variable(img), Variable(t_r))

    res2 = affine_2d(res1, Variable(t_p))

    res3 = affine_2d(img, Variable(mat))

    res4 = affine_2d(img, Variable(mat_np_t))

    res3_big = affine_2d(img, Variable(mat), out_size=[512, 512])

    res3_small = affine_2d(img, Variable(mat), out_size=[128, 128])

    Presenter().show_image(res1.data[0],
                           "res_1",
                           torch=True,
                           waitkey=False,
                           scale=4)
    Presenter().show_image(res2.data[0],
                           "res_2",
                           torch=True,
                           waitkey=False,
                           scale=4)
    Presenter().show_image(res3.data[0],
                           "res_3",
                           torch=True,
                           waitkey=False,
                           scale=4)
    Presenter().show_image(res3_big.data[0],
                           "res3_big",
                           torch=True,
                           waitkey=False,
                           scale=4)
    Presenter().show_image(res3_small.data[0],
                           "res3_small",
                           torch=True,
                           waitkey=False,
                           scale=4)
    Presenter().show_image(res4.data[0],
                           "res_4",
                           torch=True,
                           waitkey=True,
                           scale=4)

    print("res2 should be the same as res_3 and res_4")
コード例 #12
0
    def get_action(self, state, instruction):
        """
        Given a DroneState (from PomdpInterface) and instruction, produce a numpy 4D action (x, y, theta, pstop)
        :param state: DroneState object with the raw image from the simulator
        :param instruction: Tokenized instruction given the corpus
        #TODO: Absorb corpus within model
        :return:
        """
        # TODO: Simplify this
        self.eval()
        images_np_pure = state.image
        state_np = state.state

        #print("Act: " + debug_untokenize_instruction(instruction))

        images_np = standardize_image(images_np_pure)
        image_fpv = Variable(none_padded_seq_to_tensor([images_np]))
        state = Variable(none_padded_seq_to_tensor([state_np]))
        # Add the batch dimension

        first_step = True
        if instruction == self.prev_instruction:
            first_step = False
        self.prev_instruction = instruction
        instruction_str = debug_untokenize_instruction(instruction)

        # TODO: Move this to PomdpInterface (for now it's here because this is already visualizing the maps)
        if first_step:
            if self.rviz is not None:
                self.rviz.publish_instruction_text(
                    "instruction", debug_untokenize_instruction(instruction))
        #if first_step:
        #    say(debug_untokenize_instruction(instruction))

        img_in_t = image_fpv
        img_in_t.volatile = True

        instr_len = [len(instruction)] if instruction is not None else None
        instruction = torch.LongTensor(instruction).unsqueeze(0)
        instruction = cuda_var(instruction, self.is_cuda, self.cuda_device)

        state.volatile = True

        if self.is_cuda:
            if img_in_t is not None:
                img_in_t = img_in_t.cuda(self.cuda_device)
            state = state.cuda(self.cuda_device)

        step_enc = None
        plan_now = None

        self.seq_step += 1

        action = self(img_in_t,
                      state,
                      instruction,
                      instr_len,
                      plan=plan_now,
                      pos_enc=step_enc)

        # Save materials for analysis and presentation
        if self.params["write_figures"]:
            self.save_viz(images_np_pure, instruction_str)

        output_action = action.squeeze().data.cpu().numpy()
        stop_prob = output_action[3]
        print(f"P(STOP): {stop_prob}")
        output_stop = 1 if stop_prob > self.params["stop_p"] else 0
        output_action[3] = output_stop

        return output_action