def generate_semantic_layers(env:habitat.RLEnv,mapper:Mapper, M:int, mapper_config:CN,  global_seen_map_np:torch.Tensor):
    grid_size = mapper_config.TASK.GT_EGO_MAP.MAP_SCALE
    start_y = env.habitat_env.sim.get_agent_state().position[1]
    
    # init the semantic layers 
    semantic_layers = []
    scene = env.habitat_env.sim.semantic_scene
    obj_pos = {}
    for name in label_idx.keys():
        obj_pos[name] = {obj.id : [obj.aabb.center,obj.aabb.sizes]
                            for obj in scene.objects
                            if name == obj.category.name()}

    for objects in tqdm.tqdm(obj_pos,desc='semantic map', position=3):
        if objects == 'chair':
            obj_mapping = np.zeros((M,M))
            gc.collect()
            for object in obj_pos[objects].items():
                pos = pos_real_to_map(object[1][0], env.habitat_env.current_episode)
                obj_height = object[1][0][1]
                # Check if obj is part of the current floor. the centre of the floor is 1 meter higher than the start position and we look for object inside a 2 meters bound.
                if np.abs(start_y - obj_height + 1) > 1:
                    continue
                
                edges_w = max(int(object[1][1][0] / grid_size), 1)
                edges_l = max(int(object[1][1][2] / grid_size), 1)
                trans_obj = asnumpy(map_ego_to_global(mapper, edges_w, edges_l,torch.Tensor(pos).unsqueeze(0),M).squeeze())

                obj_mapping = (obj_mapping + trans_obj) > 0

            reduced_obj_layer = obj_mapping
            semantic_layers.append(reduced_obj_layer)
    return np.array(semantic_layers)
Esempio n. 2
0
    def _load_transformed_wall_maps(self, scene_map_info, episode):
        seen_maps = []
        wall_maps = []
        start_position = episode.start_position  # (X, Y, Z)
        start_rotation = quaternion_xyzw_to_wxyz(episode.start_rotation)
        start_heading = compute_heading_from_quaternion(start_rotation)
        for floor_data in scene_map_info:
            seen_map = np.load(floor_data["seen_map_path"])
            wall_map = np.load(floor_data["wall_map_path"])
            # ===== Transform the maps relative to the episode start pose =====
            map_view_position = floor_data["world_position"]
            map_view_heading = floor_data["world_heading"]
            # Originally, Z is downward and X is rightward.
            # Convert it to X upward and Y rightward
            x_map, y_map = -map_view_position[2], map_view_position[0]
            theta_map = map_view_heading
            x_start, y_start = -start_position[2], start_position[0]
            theta_start = start_heading
            # Compute relative coordinates
            r_rel = math.sqrt((x_start - x_map) ** 2 + (y_start - y_map) ** 2)
            phi_rel = math.atan2(y_start - y_map, x_start - x_map) - theta_map
            x_rel = r_rel * math.cos(phi_rel) / self.config.MAP_SCALE
            y_rel = r_rel * math.sin(phi_rel) / self.config.MAP_SCALE
            theta_rel = theta_start - theta_map
            # Convert these to image coordinates with X being rightward and Y
            # being downward
            x_img_rel = y_rel
            y_img_rel = -x_rel
            theta_img_rel = theta_rel
            x_trans = torch.Tensor([[x_img_rel, y_img_rel, theta_img_rel]])
            # Perform the transformations
            p_seen_map = rearrange(torch.Tensor(seen_map), "h w c -> () c h w")
            p_wall_map = rearrange(torch.Tensor(wall_map), "h w c -> () c h w")
            p_seen_map_trans = spatial_transform_map(p_seen_map, x_trans)
            p_wall_map_trans = spatial_transform_map(p_wall_map, x_trans)
            seen_map_trans = asnumpy(p_seen_map_trans)
            seen_map_trans = rearrange(seen_map_trans, "() c h w -> h w c")
            wall_map_trans = asnumpy(p_wall_map_trans)
            wall_map_trans = rearrange(wall_map_trans, "() c h w -> h w c")
            seen_maps.append(seen_map_trans)
            wall_maps.append(wall_map_trans)

        return seen_maps, wall_maps
Esempio n. 3
0
def test_chainer_layer():
    chainer_is_present = any(
        'chainer' in backend.framework_name for backend in collect_test_backends(symbolic=False, layers=True)
    )
    if chainer_is_present:
        # checked that gluon present
        import chainer
        import chainer.links as L
        import chainer.functions as F
        from einops.layers.chainer import Rearrange, Reduce, EinMix
        from einops import asnumpy
        import numpy as np

        def create_model():
            return chainer.Sequential(
                L.Convolution2D(3, 6, ksize=(5, 5)),
                Reduce('b c (h h2) (w w2) -> b c h w', 'max', h2=2, w2=2),
                L.Convolution2D(6, 16, ksize=(5, 5)),
                Reduce('b c (h h2) (w w2) -> b c h w', 'max', h2=2, w2=2),
                Rearrange('b c h w -> b (c h w)'),
                L.Linear(16 * 5 * 5, 120),
                L.Linear(120, 84),
                F.relu,
                EinMix('b c1 -> (b c2)', weight_shape='c1 c2', bias_shape='c2', c1=84, c2=84),
                EinMix('(b c2) -> b c3', weight_shape='c2 c3', bias_shape='c3', c2=84, c3=84),
                L.Linear(84, 10),
            )

        model1 = create_model()
        model2 = create_model()
        x = np.random.normal(size=[10, 3, 32, 32]).astype('float32')
        x = chainer.Variable(x)
        assert not numpy.allclose(asnumpy(model1(x)), asnumpy(model2(x)))

        with tempfile.TemporaryDirectory() as dir:
            filename = f'{dir}/file.npz'
            chainer.serializers.save_npz(filename, model1)
            chainer.serializers.load_npz(filename, model2)

        assert numpy.allclose(asnumpy(model1(x)), asnumpy(model2(x)))
Esempio n. 4
0
def test_gluon_layer():
    gluon_is_present = any(
        'mxnet' in backend.framework_name for backend in collect_test_backends(symbolic=False, layers=True)
    )
    if gluon_is_present:
        # checked that gluon present
        import mxnet
        from mxnet.gluon.nn import HybridSequential, Dense, Conv2D, LeakyReLU
        from einops.layers.gluon import Rearrange, Reduce, EinMix
        from einops import asnumpy

        def create_model():
            model = HybridSequential()
            layers = [
                Conv2D(6, kernel_size=5),
                Reduce('b c (h h2) (w w2) -> b c h w', 'max', h2=2, w2=2),
                Conv2D(16, kernel_size=5),
                Reduce('b c (h h2) (w w2) -> b c h w', 'max', h2=2, w2=2),
                Rearrange('b c h w -> b (c h w)'),
                Dense(120),
                LeakyReLU(alpha=0.0),
                Dense(84),
                LeakyReLU(alpha=0.0),
                Dense(10),
            ]
            for layer in layers:
                model.add(layer)
            model.initialize(mxnet.init.Xavier(), ctx=mxnet.cpu())
            return model

        model1 = create_model()
        model2 = create_model()
        x = mxnet.ndarray.random_normal(shape=[10, 3, 32, 32])
        assert not numpy.allclose(asnumpy(model1(x)), asnumpy(model2(x)))

        with tempfile.NamedTemporaryFile(mode='r+b') as fp:
            model1.save_parameters(fp.name)
            model2.load_parameters(fp.name)

        assert numpy.allclose(asnumpy(model1(x)), asnumpy(model2(x)))

        # testing with symbolic (NB with fixed dimensions!)
        input = mxnet.sym.Variable('data', shape=x.shape)
        json = model1(input).tojson()
        model3 = mxnet.gluon.SymbolBlock(outputs=mxnet.sym.load_json(json), inputs=input)
        model4 = mxnet.gluon.SymbolBlock(outputs=mxnet.sym.load_json(json), inputs=input)
        model3.initialize(ctx=mxnet.cpu())
        model3(x)

        with tempfile.NamedTemporaryFile(mode='r+b') as fp:
            model3.save_parameters(fp.name)
            model4.load_parameters(fp.name)
        assert numpy.allclose(asnumpy(model3(x)), asnumpy(model4(x)))

        try:
            model1.hybridize(static_alloc=True, static_shape=True)
            model1(x)
        except:
            # hybridization is not supported
            pass
Esempio n. 5
0
def unprocess_image(img):
    """Undo imagenet normalization to a batch of images."""
    # img - (bs, C, H, W)
    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]
    img_unproc = np.copy(asnumpy(img))
    img_unproc[:, 0] = img_unproc[:, 0] * std[0] + mean[0]
    img_unproc[:, 1] = img_unproc[:, 1] * std[1] + mean[1]
    img_unproc[:, 2] = img_unproc[:, 2] * std[2] + mean[2]

    img_unproc = np.clip(img_unproc, 0.0, 1.0) * 255.0
    img_unproc = img_unproc.astype(np.uint8)
    img_unproc = rearrange(img_unproc, "b c h w -> b h w c")

    return img_unproc
Esempio n. 6
0
    def forward(self, all_voting_maps_):
        """Each visually similar viewpoint votes for the location of the
        target viewpoint in the top-down view. These voting maps are then
        summed and the final pose is estimated by taking the location with the
        highest overall vote.

        Inputs:
            all_voting_maps_ - (k, bs, 1, mh, mw)
        Outputs:
            pose             - (bs, num_poses) tensor
            mask_maxmin      - (bs, ) tensor
        """
        bs = all_voting_maps_.shape[1]
        # ======================== Aggregate votes ============================
        all_voting_maps = all_voting_maps_.sum(dim=0)  # (bs, 1, mh, mw)
        # Add small non-zero votes to all locations
        all_voting_maps = all_voting_maps + 1e-9
        voting_sum = reduce(all_voting_maps, "b c h w -> b () () ()", "sum")
        voting_map = all_voting_maps / voting_sum  # (bs, 1, mh, mw)
        # =================== Compute argmax locations ========================
        device = voting_map.device
        mh, mw = voting_map.shape[2], voting_map.shape[3]
        midx, midy = mw // 2, mh // 2
        voting_map_flat = rearrange(asnumpy(voting_map[:, 0]), "b h w -> b (h w)")
        predy, predx = np.unravel_index(np.argmax(voting_map_flat, axis=1), (mh, mw))
        predy = (torch.Tensor(predy).to(device).float() - midy) * self.map_scale
        predx = (torch.Tensor(predx).to(device).float() - midx) * self.map_scale
        predr = torch.norm(torch.stack([predx, predy], dim=1), dim=1)
        predt = torch.atan2(predy, predx)
        # Handle edge-case where no votes were cast. When no votes are cast,
        # the pose prediction is set to zeros by default.
        diff_maxmin = np.max(voting_map_flat, axis=1) - np.min(
            voting_map_flat, axis=1
        )  # (bs, )
        mask_maxmin = (torch.Tensor(diff_maxmin) == 0).to(device)
        predr[mask_maxmin] = 0
        predt[mask_maxmin] = 0
        # Converts predicted pose to required format.
        pose = process_pose(torch.stack([predr, predt], dim=1))
        return pose, mask_maxmin
def get_eval_map(env:ExpRLEnv, trainer:SemAntExpTrainer,  M:int,depth_projection_net:DepthProjectionNet, step:int, objectName:str):
    """Given the environment and the configuration, compute the global
    top-down wall and seen area maps by sampling maps for individual locations
    along a uniform grid in the environment, and registering them.

    step (m): the length between two measure points
    """
    # Initialize a global map for the episode
    scene_name = env.habitat_env.current_episode.scene_id.split('/')[-1].split('.')[0]
    dirPath = './data/debug/data/' + objectName + "/" + scene_name + '_' + env.habitat_env.current_episode.episode_id + '/' + "record/"
    safe_mkdir(dirPath)
    mapper = trainer.mapper
    config = trainer.config.TASK_CONFIG 
    device = trainer.device

    scene = env.habitat_env.sim.semantic_scene
    obj_pos = {}
    obj_pos = {obj.id : [obj.aabb.center,obj.aabb.sizes]
                            for obj in scene.objects
                            if objectName == obj.category.name()}
                            
    with open(dirPath + 'obj_pos.pkl', 'wb') as f: pickle.dump(obj_pos, f)
    objectIndexes = [int(key.split('_')[-1]) for key in obj_pos.keys()]

    global_wall_map = torch.zeros(1, 2, M, M).to(device)
    global_seen_map = torch.zeros(1, 2, M, M).to(device)
    global_object_map = torch.zeros(1, 2, M, M).to(device)
    global_se_map = torch.zeros(1, 2, M, M).to(device)
    global_se_filtered_map = torch.zeros(1, 2, M, M).to(device)
    global_se_seen_map = torch.zeros(1, 2, M, M).to(device)
    global_se_map_mit = torch.zeros(1, 2, M, M).to(device)
    global_se_map_gt = torch.zeros(1, 2, M, M).to(device)

    grid_size = config.TASK.GT_EGO_MAP.MAP_SCALE
    coordinate_max = maps.COORDINATE_MAX
    coordinate_min = maps.COORDINATE_MIN
    resolution = (coordinate_max - coordinate_min) / grid_size
    grid_resolution = (int(resolution), int(resolution))

    top_down_map = maps.get_topdown_map(
        env.habitat_env.sim, grid_resolution, 20000, draw_border=False,
    )

    map_w, map_h = top_down_map.shape

    intervals = (max(int(step / grid_size), 1), max(int(step / grid_size), 1))
    x_vals = np.arange(0, map_w, intervals[0], dtype=int)
    y_vals = np.arange(0, map_h, intervals[1], dtype=int)
    coors = np.stack(np.meshgrid(x_vals, y_vals), axis=2)  # (H, W, 2)
    coors = coors.reshape(-1, 2)  # (H*W, 2)
    map_vals = top_down_map[coors[:, 0], coors[:, 1]]
    valid_coors = coors[map_vals > 0]

    real_x_vals = coordinate_max - valid_coors[:, 0] * grid_size
    real_z_vals = coordinate_min + valid_coors[:, 1] * grid_size
    start_y = env.habitat_env.sim.get_agent_state().position[1]
    index = 0
    records = []

    totalOffsetX = np.random.uniform(-1.5, 1.5)
    totalOffsetY = np.random.uniform(-1.5, 1.5)
    for j in tqdm.tqdm(range(real_x_vals.shape[0]),desc='occupacy map', position=2):
        for theta in np.arange(-np.pi, np.pi, np.pi ):
            index +=1

            randomAngle = np.random.uniform(-np.pi,np.pi)
            randomX = np.random.uniform(-0.5, 0.5)
            randomY = np.random.uniform(-0.5, 0.5)

            position = [
                real_x_vals[j].item() + randomX + totalOffsetX,
                start_y.item(),
                real_z_vals[j].item() + randomY + totalOffsetY,
            ]
            rotation = [
                0.0,
                np.sin((theta + randomAngle) / 2).item(),
                0.0,
                np.cos((theta + randomAngle) / 2).item(),
            ]

            sim_obs = env.habitat_env.sim.get_observations_at(
                position, rotation, keep_agent_at_new_pose=True,
            )
            episode = env.habitat_env.current_episode
            obs = obs, _, _, _ = env.step(action={'action':1})
            batch = batch_obs([obs], device=trainer.device)

            semantic = batch["semantic"]
            mask = torch.zeros_like(semantic,dtype=bool)
            for objectIndex in objectIndexes:
                mask ^= (semantic == objectIndex)
            mask = rearrange(mask, "b h w  -> b h w ()")
            batch["object_mask_gt"] = mask

            ego_map_gt_b = depth_projection_net(
                    rearrange(batch["depth"], "b h w c -> b c h w")
                )
            object_depth = batch["depth"]*(mask > 0) + (mask == 0)*100
            
            ego_map_gt_object = depth_projection_net(rearrange(object_depth, "b h w c -> b c h w"))
            batch["ego_map_gt_object"] = ego_map_gt_object
            #begin use MIT Seg
            img_data = pil_to_tensor(rearrange(batch["rgb"], "b h w c -> b c h w")[0] / 255) 
            singleton_batch = {'img_data': img_data[None]}
            output_size = img_data.shape[1:]
            with torch.no_grad():
                scores = segmentation_module(singleton_batch, segSize=output_size)
            _, pred = torch.max(scores, dim=1)
            # visualize_result(pred)
            #end use MIT Seg
            batch["mit"] = pred
            #mask for chair
            
            if objectName == "chair":
                mask = (pred == 19)^(pred == 30)^(pred == 75)
            elif objectName == "bed":
                mask = (pred == 7)
            elif objectName == "table":
                mask = (pred == 15)
            else:
                mask = pred > 0
            
            mask = rearrange(mask, "b h w  -> b h w ()")
            batch["object_mask_mit"] = mask
            object_depth2 = batch["depth"]*(mask > 0) + (mask == 0)*100
            ego_map_gt_object2 = depth_projection_net(rearrange(object_depth2, "b h w c -> b c h w"))
            batch["ego_map_mit_object"] = ego_map_gt_object2
            ego_map_gt_anti = transpose_image(batch["ego_map_gt_anticipated"]).to(trainer.device)


            pu_inputs_t = {
                "rgb": process_image(batch["rgb"], trainer.mapper.img_mean_t, trainer.mapper.img_std_t).to(trainer.device),
                "depth": transpose_image(batch["depth"]).to(trainer.device),
                "ego_map_gt": transpose_image(
                        rearrange(ego_map_gt_b, "b c h w -> b h w c")
                    ).to(trainer.device),
                "ego_map_gt_anticipated": ego_map_gt_anti,
            }
            pu_inputs = trainer.mapper._safe_cat(pu_inputs_t, pu_inputs_t)

            torch.save(batch, dirPath + 'pu_inputs_t_'+str(index)+'.pt')

            pu_output = trainer.mapper.projection_unit(pu_inputs)
            se_map = asnumpy(pu_output['sem_estimate'])[0,:,:,:]

            ego_map_gt_b_np =  asnumpy(ego_map_gt_b[0,...])
            se_seen = ego_map_gt_b_np*se_map
            dilation_mask = np.ones((100, 100))

            current_mask = cv2.dilate(
                se_seen.astype(np.float32), dilation_mask, iterations=1,
            ).astype(np.float32)

            se_filtered_map = se_map
            se_seen_map = torch.Tensor(se_seen).to(device)
            se_seen_map = rearrange(se_seen_map, "c h w -> () c h w")
            se_seen_map = (se_seen_map[:,0])[None,:]

            se_map = torch.Tensor(se_map).to(device)
            se_map = rearrange(se_map, "c h w -> () c h w")

            se_filtered_map = torch.Tensor(se_filtered_map).to(device)
            se_filtered_map = rearrange(se_filtered_map, "c h w -> () c h w")

            torch.save(se_filtered_map, dirPath + 'se_filtered_map_'+str(index)+'.pt')

            ego_map_gt = torch.Tensor(obs["ego_map_gt"]).to(device)
            ego_map_gt = rearrange(ego_map_gt, "h w c -> () c h w")

            ego_wall_map_gt = torch.Tensor(obs["ego_wall_map_gt"]).to(device)
            ego_wall_map_gt = rearrange(ego_wall_map_gt, "h w c -> () c h w")



            pose_gt = torch.Tensor(obs["pose_gt"]).unsqueeze(0).to(device)

            global_se_seen_map = mapper.ext_register_map(
                global_se_seen_map, se_seen_map, pose_gt
            )

            global_se_map_gt = mapper.ext_register_map(
                global_se_map_gt, ego_map_gt_anti, pose_gt
            )

            global_seen_map = mapper.ext_register_map(
                global_seen_map, ego_map_gt_b, pose_gt
            )

            global_object_map = mapper.ext_register_map(
                global_object_map, ego_map_gt_object, pose_gt
            )

            global_wall_map = mapper.ext_register_map(
                global_wall_map, ego_wall_map_gt, pose_gt
            )
            global_se_map = mapper.ext_register_map(
                global_se_map, se_map, pose_gt
            )

            global_se_filtered_map = mapper.ext_register_map(
                global_se_filtered_map, se_filtered_map, pose_gt
            )

            global_se_map_mit = mapper.ext_register_map(
                global_se_map_mit, ego_map_gt_object2, pose_gt
            )
            se_filtered_map = None
            batch = None
            se_map = None
            ego_map_gt = None
            ego_wall_map_gt = None
            obs = None
            pu_inputs_t = None
            pu_output=None
            _ = None
            pu_inputs=None
            gc.collect()
            torch.cuda.empty_cache()

    mask = dilate_tensor(global_seen_map*(1 - global_wall_map > 0)*global_se_map,(51, 51))

    global_se_filtered_map = global_se_filtered_map*mask
    global_se_seen_map = global_se_seen_map*mask

    mask = dilate_tensor(global_seen_map*(1 - global_wall_map > 0)*global_object_map,(51, 51))
    global_se_map_gt = global_se_map_gt*mask

    global_wall_map_np = asnumpy(rearrange(global_wall_map, "b c h w -> b h w c")[0])
    global_seen_map_np = asnumpy(rearrange(global_seen_map, "b c h w -> b h w c")[0])
    global_se_map_np = asnumpy(rearrange(global_se_map, "b c h w -> b h w c")[0])
    global_se_global_se_filtered_map_np= asnumpy(rearrange(global_se_filtered_map, "b c h w -> b h w c")[0])

    global_se_map_mit_np= asnumpy(rearrange(global_se_map_mit, "b c h w -> b h w c")[0])

    global_se_map_gt_np= asnumpy(rearrange(global_se_map_gt, "b c h w -> b h w c")[0])

    global_se_seen_map_np = asnumpy(rearrange(global_se_seen_map, "b c h w -> b h w c")[0])

    global_object_map_np = asnumpy(rearrange(global_object_map, "b c h w -> b h w c")[0])

    
    return global_seen_map_np, global_wall_map_np, global_se_map_np,global_se_global_se_filtered_map_np, global_se_map_mit_np,global_se_map_gt_np,global_se_seen_map_np,global_object_map_np
        safe_mkdir(mapPath)
        save_episode_info(env, mapPath + "epInfo")
        for key in mapsDic.keys():
            np.save(mapPath + key, mapsDic[key])
            
        dataPath = './data/debug/data/' + objectName + "/" + scene_name + '_' + env.habitat_env.current_episode.episode_id + '/' + "record/"
        imgPath = './data/debug/img/' + objectName + "/" + scene_name + '_' + env.habitat_env.current_episode.episode_id +"/" + "ep" + str(i) + '/'
        file = open(dataPath + "obj_pos.pkl",'rb')
        obj_pos = pickle.load(file)
        filelist = os.listdir(dataPath)

        for id in range(len(filelist) // 2 ):
            idx = id+1
            se_filtered_map = torch.load(dataPath + 'se_filtered_map_'+str(idx)+'.pt')
            pu_inputs_t = torch.load(dataPath + 'pu_inputs_t_'+str(idx)+'.pt')
            rbg_numpy = asnumpy(pu_inputs_t["rgb"][0])/255
            semantic_numpy = asnumpy(pu_inputs_t["semantic"][0]) % 40
            indexes = np.unique(semantic_numpy)
            if pu_inputs_t["ego_map_gt_anticipated"][0,:,:,0].sum() > 0:
                safe_mkdir(imgPath + str(idx))
                visualize_result(pu_inputs_t['mit'],imgPath, colors)
                plt.imsave(imgPath + str(idx)+'/predict_'+ str(idx) + '.png', asnumpy(se_filtered_map[0,0]))
                plt.imsave(imgPath + str(idx)+'/ego_map_gt_anticipated_'+ str(idx) + '.png', asnumpy(pu_inputs_t["ego_map_gt_anticipated"][0,:,:,0]))
                plt.imsave(imgPath + str(idx)+'/rbg_image_'+ str(idx) + '.png', rbg_numpy)
                plt.imsave(imgPath + str(idx)+'/ego_map_gt_'+ str(idx) + '.png', asnumpy(pu_inputs_t["ego_map_gt"][0,:,:,0]))
                plt.imsave(imgPath + str(idx)+'/depth_image_'+ str(idx) + '.png', asnumpy(np.squeeze(pu_inputs_t["depth"][0,:,:,0])))
                plt.imsave(imgPath + str(idx)+'/object_mask_'+ str(idx) + '.png', asnumpy(np.squeeze(pu_inputs_t["object_mask_gt"][0,:,:,0])))      
                plt.imsave(imgPath + str(idx)+'/ego_map_ob_gt_'+ str(idx) + '.png', asnumpy(np.squeeze(pu_inputs_t["ego_map_gt_object"][0,0,:,:]))) 
                plt.imsave(imgPath + str(idx)+'/ego_map_ob_mit_'+ str(idx) + '.png', asnumpy(np.squeeze(pu_inputs_t["ego_map_mit_object"][0,0,:,:]))) 

        safe_mkdir('./data/debug/data/' + objectName + '/' + scene_name + '_' + env.habitat_env.current_episode.episode_id + '/iouscene/')
Esempio n. 9
0
 def _compute_plans_and_local_goals(
         self, global_map, agent_map_xy, SAMPLE_LOCAL_GOAL_FLAGS,
 ):
     """
     global_map - (bs, 2, V, V) tensor
     agent_map_xy - (bs, 2) agent's current position on the map
     """
     s = self.map_scale
     goal_map_xy = self.states["curr_global_goals"]
     plans_xy = self._compute_plans(
         global_map,
         agent_map_xy,
         goal_map_xy,
         SAMPLE_LOCAL_GOAL_FLAGS,
         cache_map=True,
         crop_map_flag=self.config.crop_map_for_planning,
     )
     # ========================= Sample a local goal =======================
     # Sample a local goal and measure the shortest path distance to that
     # goal according to the current map.
     # Update step counts for sample_random_explored calls
     self.states["sample_random_explored_timer"] += 1
     # Pick a local goal to reach with the local policy
     for i in range(self.nplanners):
         if SAMPLE_LOCAL_GOAL_FLAGS[i] != 1:
             continue
         path_x, path_y = plans_xy[i]
         # If planning failed, sample random local goal.
         if path_x is None:
             # Note: This is an expensive call, especially when the map is messy
             # and planning keeps failing. Call sample_random_explored only after
             # so many steps elapsed since the last call.
             if self.config.recovery_heuristic == "random_explored_towards_goal":
                 if self.states["sample_random_explored_timer"][i].item() > 10:
                     goal_x, goal_y = self._sample_random_explored_towards_goal(
                         global_map[i],
                         asnumpy(agent_map_xy[i]).tolist(),
                         asnumpy(goal_map_xy[i]).tolist(),
                         s,
                     )
                     # Reset count
                     self.states["sample_random_explored_timer"][i] = 0
                 else:
                     goal_x, goal_y = self._sample_random_towards_goal(
                         global_map[i],
                         asnumpy(agent_map_xy[i]).tolist(),
                         asnumpy(goal_map_xy[i]).tolist(),
                         s,
                     )
                     goal_x, goal_y = asnumpy(agent_map_xy[i]).tolist()
             elif self.config.recovery_heuristic == "random_explored":
                 if self.states["sample_random_explored_timer"][i].item() > 10:
                     goal_x, goal_y = self._sample_random_explored(
                         global_map[i], asnumpy(agent_map_xy[i]).tolist(), s
                     )
                     # Reset count
                     self.states["sample_random_explored_timer"][i] = 0
                 else:
                     goal_x, goal_y = self._sample_random_near_agent(
                         global_map[i], asnumpy(agent_map_xy[i]).tolist(), s
                     )
                     # goal_x, goal_y = asnumpy(agent_map_xy[i]).tolist()
             else:
                 raise ValueError
             # When planning fails, default to euclidean distance
             curr_x, curr_y = agent_map_xy[i].tolist()
             splength = (
                     math.sqrt((goal_x - curr_x) ** 2 + (goal_y - curr_y) ** 2) * s
             )
         else:
             dl = min(int(self.planning_step_mts / s), len(path_x) - 1)
             # The path is in reverse order
             goal_x, goal_y = path_x[-dl], path_y[-dl]
             sp_xy = np.array([path_x[-dl:], path_y[-dl:]]).T  # (dl, 2)
             splength = (
                     np.linalg.norm(sp_xy[:-1] - sp_xy[1:], axis=1).sum().item() * s
             )
         # Ensure goals are within map bounds
         goal_x = np.clip(goal_x, 0, global_map[i].shape[-1] - 1).item()
         goal_y = np.clip(goal_y, 0, global_map[i].shape[-2] - 1).item()
         # Set the local goals as well as the corresponding path length
         # measures
         self.states["curr_local_goals"][i, 0] = goal_x
         self.states["curr_local_goals"][i, 1] = goal_y
         self.states["local_shortest_path_length"][i] = splength
         self.states["local_path_length"][i] = 0.0
Esempio n. 10
0
    def _compute_gt_local_action(
            self, global_map, agent_world_xyt, goal_world_xyt, M, s
    ):
        """
        Estimate the shortest-path action from agent position to goal position.
        """
        agent_map_xy = convert_world2map(agent_world_xyt, (M, M), s)
        goal_map_xy = convert_world2map(goal_world_xyt, (M, M), s)
        # ============ Crop a region covering agent_map_xy and goal_map_xy ============
        abs_delta_xy = torch.abs(agent_map_xy - goal_map_xy)
        S = max(int(torch.max(abs_delta_xy).item()), 80)
        old_center_xy = (agent_map_xy + goal_map_xy) // 2  # (bs, 2)
        cropped_global_map = crop_map(global_map, old_center_xy, int(S))
        global_map_np = self._process_maps(cropped_global_map)  # (bs, M, M)
        new_center_xy = torch.zeros_like(old_center_xy)
        new_center_xy[:, 0] = global_map_np.shape[2] // 2
        new_center_xy[:, 1] = global_map_np.shape[1] // 2
        # ================ Transform points to new coordinate system ==================
        new_agent_map_xy = agent_map_xy + new_center_xy - old_center_xy
        new_goal_map_xy = goal_map_xy + new_center_xy - old_center_xy
        map_xy_np = asnumpy(new_agent_map_xy).astype(np.int32)  # (bs, 2)
        goal_xy_np = asnumpy(new_goal_map_xy).astype(np.int32)
        # Ensure that points do not go outside map limits
        map_W = global_map_np.shape[2]
        map_H = global_map_np.shape[1]
        map_xy_np[:, 0] = np.clip(map_xy_np[:, 0], 0, map_W - 1)
        map_xy_np[:, 1] = np.clip(map_xy_np[:, 1], 0, map_H - 1)
        goal_xy_np[:, 0] = np.clip(goal_xy_np[:, 0], 0, map_W - 1)
        goal_xy_np[:, 1] = np.clip(goal_xy_np[:, 1], 0, map_H - 1)
        sample_flag = [1.0 for _ in range(self.nplanners)]
        # Compute plan
        plans_xy = self.planner.plan(global_map_np, map_xy_np, goal_xy_np, sample_flag)
        # ===================== Sample an action to a nearby point ====================
        # 0 is forward, 1 is left, 2 is right
        gt_actions = []
        forward_step = self.config.LOCAL_POLICY.AGENT_DYNAMICS.forward_step
        turn_angle = math.radians(self.config.LOCAL_POLICY.AGENT_DYNAMICS.turn_angle)
        for i in range(self.nplanners):
            path_x, path_y = plans_xy[i]
            # If planning failed, sample random action
            if path_x is None:
                gt_actions.append(random.randint(0, 2))
                continue
            # Plan to navigate to a point 0.5 meters away
            dl = min(int(0.5 / s), len(path_x) - 1)
            # The path is in reverse order
            goal_x, goal_y = path_x[-dl], path_y[-dl]
            agent_x, agent_y = map_xy_np[i].tolist()
            # Decide action
            agent_heading = agent_world_xyt[i, 2].item() - math.pi / 2
            reqd_heading = math.atan2(goal_y - agent_y, goal_x - agent_x)
            diff_angle = reqd_heading - agent_heading
            diff_angle = math.atan2(math.sin(diff_angle), math.cos(diff_angle))
            # Move forward if facing the correct direction
            if abs(diff_angle) < 1.5 * turn_angle:
                gt_actions.append(0)
            # Turn left if the goal is to the left
            elif diff_angle < 0:
                gt_actions.append(1)
            # Turn right otherwise
            else:
                gt_actions.append(2)

        return torch.Tensor(gt_actions).unsqueeze(1).long()
Esempio n. 11
0
    def act(
            self,
            observations,
            prev_observations,
            prev_state_estimates,
            ep_time,
            masks,
            deterministic=False,
    ):
        # ============================ Set useful variables ===========================
        ep_step = ep_time[0].item()
        M = prev_state_estimates["map_states"].shape[2]
        s = self.map_scale
        device = observations['rgb'].device
        assert M % 2 == 1, "The code is tested only for odd map sizes!"
        # =================== Update states from current observation ==================
        # Update map, pose and visitation map
        mapper_inputs = self._create_mapper_inputs(
            observations, prev_observations, prev_state_estimates
        )
        mapper_outputs = self.mapper(mapper_inputs)
        global_map = mapper_outputs["mt"]
        global_pose = mapper_outputs["xt_hat"]
        if self.config.MAPPER.debug_log:
            print(f"Global Pose: {global_pose}")
        map_xy = convert_world2map(global_pose[:, :2], (M, M), s)
        map_xy = torch.clamp(map_xy, 0, M - 1)
        visited_states = self._update_state_visitation(
            prev_state_estimates["visited_states"], map_xy
        )  # (bs, 1, M, M)
        # Update local ANM state variables
        curr_map_position = map_xy
        if ep_step > 0:
            # Compute state updates
            prev_dist2localgoal = self.states["curr_dist2localgoal"]
            curr_dist2localgoal = self._compute_dist2localgoal(
                global_map, map_xy, self.states["curr_local_goals"],
            )
            prev_map_position = self.states["curr_map_position"]
            prev_step_size = (
                    torch.norm(curr_map_position - prev_map_position, dim=1).unsqueeze(1)
                    * s
            )
            # Update the state variables
            self.states["prev_dist2localgoal"] = prev_dist2localgoal
            self.states["curr_dist2localgoal"] = curr_dist2localgoal
            self.states["prev_map_position"] = prev_map_position
            self.states["local_path_length"] += prev_step_size
        self.states["curr_map_position"] = curr_map_position
        # Initialize collision and visited maps at t=0
        if ep_step == 0:
            self.states["collision_map"] = torch.zeros(self.nplanners, M, M).to(device)
            self.states["visited_map"] = torch.zeros(self.nplanners, M, M).to(device)
            self.states["col_width"] = torch.ones(self.nplanners)
            # Monitors number of steps elapsed since last call to sample random explored
            self.states["sample_random_explored_timer"] = torch.zeros(self.nplanners)
        if ep_step > 0:
            # Update collision maps
            forward_step = self.config.LOCAL_POLICY.AGENT_DYNAMICS.forward_step
            for i in range(self.nplanners):
                prev_action_i = observations["prev_actions"][i, 0].item()
                # If not forward action, skip
                if prev_action_i != 0:
                    continue
                x1, y1 = asnumpy(self.states["prev_map_position"][i]).tolist()
                x2, y2 = asnumpy(self.states["curr_map_position"][i]).tolist()
                t2 = global_pose[i, 2].item() - math.pi / 2
                if abs(x1 - x2) < 1 and abs(y1 - y2) < 1:
                    self.states["col_width"][i] += 2
                    self.states["col_width"][i] = min(self.states["col_width"][i], 9)
                else:
                    self.states["col_width"][i] = 1
                dist_trav_i = math.sqrt((x1 - x2) ** 2 + (y1 - y2) ** 2) * s
                # Add an obstacle infront of the agent if a collision happens
                if dist_trav_i < 0.7 * forward_step:  # Collision
                    length = 2
                    width = int(self.states["col_width"][i].item())
                    buf = 3
                    cmH, cmW = self.states["collision_map"][i].shape
                    for j in range(length):
                        for k in range(width):
                            wx = (
                                    x2
                                    + ((j + buf) * math.cos(t2))
                                    + ((k - width / 2) * math.sin(t2))
                            )
                            wy = (
                                    y2
                                    + ((j + buf) * math.sin(t2))
                                    - ((k - width / 2) * math.cos(t2))
                            )
                            wx, wy = int(wx), int(wy)
                            if wx < 0 or wx >= cmW or wy < 0 or wy >= cmH:
                                continue
                            self.states["collision_map"][i, wy, wx] = 1
        # Update visitation maps
        for i in range(self.nplanners):
            mx, my = asnumpy(self.states["curr_map_position"][i]).tolist()
            mx, my = int(mx), int(my)
            self.states["visited_map"][i, my - 2: my + 3, mx - 2: mx + 3] = 1
        # ===================== Compute rewards for previous action ===================
        local_rewards = self._compute_local_rewards(ep_step, s).to(device)
        # ====================== Global policy action selection =======================
        SAMPLE_GLOBAL_GOAL_FLAG = ep_step % self.goal_interval == 0
        # Sample global goal if needed
        if SAMPLE_GLOBAL_GOAL_FLAG:
            global_policy_inputs = self._create_global_policy_inputs(
                global_map, visited_states, self.states["curr_map_position"]
            )
            # diff_policy baseline add
            if hasattr(self.config.GLOBAL_POLICY, "difference_baseline_explorer"):
                if self.config.GLOBAL_POLICY.difference_baseline_explorer:
                    update_dict = {"diff_scores": prev_state_estimates.pop("diff_scores")}
                    global_policy_inputs.update(update_dict)

            (
                global_value,
                global_action,
                global_action_log_probs,
                _,
            ) = self.global_policy.act(global_policy_inputs, None, None, None)
            # Convert action to location (row-major format)
            G = self.global_policy.G
            global_action_map_x = torch.fmod(
                global_action.squeeze(1), G
            ).float()  # (bs, )
            global_action_map_y = (global_action.squeeze(1).float() / G).float()  # (bs, )
            # Convert to MxM map coordinates
            global_action_map_x = global_action_map_x * M / G
            global_action_map_y = global_action_map_y * M / G

            if self.config.fixed_global_goal:
                starting_point = (M - 1) // 2
                global_action_map_x[0] = starting_point + (self.config.fixed_delta_x / s)
                global_action_map_y[0] = starting_point - (self.config.fixed_delta_y / s)

            global_action_map_xy = torch.stack(
                [global_action_map_x, global_action_map_y], dim=1
            )
            # Set the goal (bs, 2) --- (x, y) in map image coordinates
            self.states["curr_global_goals"] = global_action_map_xy
            # Update the current map to prev_map_states in order to facilitate
            # future reward computation.
            self.states["prev_map_states"] = global_map.detach()
        else:
            global_policy_inputs = None
            global_value = None
            global_action = None
            global_action_log_probs = None
        # ======================= Local policy action selection =======================
        # Initialize states at t=0
        if ep_step == 0:
            self.states["curr_local_goals"] = torch.zeros(self.nplanners, 2).to(device)
            self.states["local_shortest_path_length"] = torch.zeros(
                self.nplanners, 1
            ).to(device)
            self.states["local_path_length"] = torch.zeros(self.nplanners, 1).to(device)
        # Should local goals be sampled now?
        if SAMPLE_GLOBAL_GOAL_FLAG:
            if self.config.MAPPER.debug_log:
                print('NEW LOCAL GOAL SAMPLED!')
            # Condition 1: A new global goal was sampled
            SAMPLE_LOCAL_GOAL_FLAGS = [1 for _ in range(self.nplanners)]
        else:
            if self.config.MAPPER.debug_log:
                print('REACHING LOCAL GOAL:')
                print('Distance to Goal: {}'.format(self.states["curr_dist2localgoal"]))
                print('Goal Position: {}'.format(self.states["curr_local_goals"]))
                print('Agent Position: {}'.format(map_xy))
            # Condition 2 (a): The previous local goal was reached
            prev_goal_reached = (
                    self.states["curr_dist2localgoal"] < self.goal_success_radius
            )
            # Condition 2 (b): The previous local goal is occupied.
            goals = self.states["curr_local_goals"].long().to(device)
            prev_gcells = global_map[
                          torch.arange(0, goals.shape[0]).long(), :, goals[:, 1], goals[:, 0]
                          ]
            prev_goal_occupied = (prev_gcells[:, 0] > self.config.thresh_obstacle) & (
                    prev_gcells[:, 1] > self.config.thresh_explored
            )
            SAMPLE_LOCAL_GOAL_FLAGS = asnumpy(
                (prev_goal_reached | prev_goal_occupied).float()
            ).tolist()
        # Execute planner and compute local goals
        self._compute_plans_and_local_goals(
            global_map, self.states["curr_map_position"], SAMPLE_LOCAL_GOAL_FLAGS
        )
        # Update state variables to account for new local goals
        self.states["curr_dist2localgoal"] = self._compute_dist2localgoal(
            global_map,
            self.states["curr_map_position"],
            self.states["curr_local_goals"],
        )

        # Sample action with local policy
        local_masks = 1 - torch.Tensor(SAMPLE_LOCAL_GOAL_FLAGS).to(device).unsqueeze(1)
        recurrent_hidden_states = prev_state_estimates["recurrent_hidden_states"]
        relative_goals = self._compute_relative_local_goals(global_pose, M, s)
        local_policy_inputs = {
            "rgb_at_t": observations["rgb"],
            "goal_at_t": relative_goals,
            "t": ep_time,
        }
        outputs = self.local_policy.act(
            local_policy_inputs,
            recurrent_hidden_states,
            None,
            local_masks,
            deterministic=deterministic,
        )
        (
            local_value,
            local_action,
            local_action_log_probs,
            recurrent_hidden_states,
        ) = outputs
        # If imitation learning is used, also sample the ground-truth action to take
        if "gt_global_map" in observations.keys():
            gt_global_map = observations["gt_global_map"]  # (bs, 2, M, M)
            gt_global_pose = observations["pose_gt"]  # (bs, 3)
            relative_goals_aug = torch.cat(
                [relative_goals, torch.zeros_like(relative_goals[:, 0:1])], dim=1
            )
            gt_goals = add_pose(gt_global_pose, relative_goals_aug)  # (bs, 3)
            gt_actions = self._compute_gt_local_action(
                gt_global_map, gt_global_pose, gt_goals, M, s
            )  # (bs, 1)
            gt_actions = gt_actions.to(device)
        else:
            gt_actions = torch.zeros_like(local_action)
        # ============================== Create output dicts ==========================
        state_estimates = {
            "recurrent_hidden_states": recurrent_hidden_states,
            "map_states": mapper_outputs["mt"],
            "visited_states": visited_states,
            "pose_estimates": global_pose,
        }
        local_policy_outputs = {
            "values": local_value,
            "actions": local_action,
            "action_log_probs": local_action_log_probs,
            "local_masks": local_masks,
            "gt_actions": gt_actions,
        }
        global_policy_outputs = {
            "values": global_value,
            "actions": global_action,
            "action_log_probs": global_action_log_probs,
        }
        rewards = {
            "local_rewards": local_rewards,
        }
        mapper_outputs.update({'curr_map_position': curr_map_position})
        return (
            mapper_inputs,
            local_policy_inputs,
            global_policy_inputs,
            mapper_outputs,
            local_policy_outputs,
            global_policy_outputs,
            state_estimates,
            rewards,
        )
Esempio n. 12
0
    def _compute_plans(
            self,
            global_map,
            agent_map_xy,
            goal_map_xy,
            sample_goal_flags,
            cache_map=False,
            crop_map_flag=True,
    ):
        """
        global_map - (bs, 2, V, V) tensor
        agent_map_xy - (bs, 2) agent's current position on the map
        goal_map_xy - (bs, 2) goal's current position on the map
        sample_goal_flags - list of zeros and ones should a new goal be sampled?
        """
        # ==================== Process the map to get planner map =====================
        s = self.map_scale
        # Processed map has zeros for free-space and ones for obstacles
        global_map_proc = self._process_maps(
            global_map, goal_map_xy
        )  # (bs, M, M) tensor
        # =================== Crop a local region around agent, goal ==================
        if crop_map_flag:
            # Determine crop size
            abs_diff_xy = torch.abs(agent_map_xy - goal_map_xy)
            S = int(torch.max(abs_diff_xy).item())
            # Add a small buffer space around the agent location
            buffer_size = int(3.0 / s)  # 3 meters buffer space in total
            S += buffer_size
            old_center_xy = (agent_map_xy + goal_map_xy) / 2
            # Crop a SxS region centered around old_center_xy
            # Note: The out-of-bound regions will be zero-padded by default. In this case,
            # since zeros correspond to free-space, that is not a problem.
            cropped_global_map = crop_map(
                global_map_proc.unsqueeze(1), old_center_xy, S
            ).squeeze(
                1
            )  # (bs, S, S)
            # Add zero padding to ensure the plans don't fail due to cropping
            pad_size = 5
            cropped_global_map = F.pad(
                cropped_global_map, (pad_size, pad_size, pad_size, pad_size)
            )
            S += pad_size * 2
            # Transform to new coordinates
            new_center_xy = torch.ones_like(old_center_xy) * (S / 2)
            new_agent_map_xy = agent_map_xy + (new_center_xy - old_center_xy)
            new_goal_map_xy = goal_map_xy + (new_center_xy - old_center_xy)
        else:
            cropped_global_map = global_map_proc  # (bs, M, M)
            old_center_xy = (agent_map_xy + goal_map_xy) / 2
            new_center_xy = old_center_xy
            new_agent_map_xy = agent_map_xy
            new_goal_map_xy = goal_map_xy
            S = cropped_global_map.shape[1]

        if cache_map:
            self._cropped_global_map = cropped_global_map
        # Clip points to ensure they are within map limits
        new_agent_map_xy = torch.clamp(new_agent_map_xy, 0, S - 1)
        new_goal_map_xy = torch.clamp(new_goal_map_xy, 0, S - 1)
        # Convert to numpy
        agent_map_xy_np = asnumpy(new_agent_map_xy).astype(np.int32)
        goal_map_xy_np = asnumpy(new_goal_map_xy).astype(np.int32)
        global_map_np = asnumpy(cropped_global_map)
        # =================== Plan path from agent to goal positions ==================
        plans = self.planner.plan(
            global_map_np, agent_map_xy_np, goal_map_xy_np, sample_goal_flags
        )  # List of tuple of lists
        # Convert plans back to original coordinates
        final_plans = []
        for i in range(len(plans)):
            plan_x, plan_y = plans[i]
            # Planning failure
            if plan_x is None:
                final_plans.append((plan_x, plan_y))
                continue
            offset_x = int((old_center_xy[i, 0] - new_center_xy[i, 0]).item())
            offset_y = int((old_center_xy[i, 1] - new_center_xy[i, 1]).item())
            final_plan_x, final_plan_y = [], []
            for px, py in zip(plan_x, plan_y):
                final_plan_x.append(px + offset_x)
                final_plan_y.append(py + offset_y)
            final_plans.append((final_plan_x, final_plan_y))
        return final_plans
Esempio n. 13
0
    def get_observation(self, *args: Any, observations, episode, **kwargs: Any):
        sim_depth = asnumpy(observations["depth"])
        ego_wall_map_gt = self._get_depth_projection(sim_depth)

        return ego_wall_map_gt
Esempio n. 14
0
    assert caps_norms.max() <= 1.001, 'capsules outputs should be bound by unit norm'
    # correct capsule is enforced is not penalized if norm > m_plus,
    # while incorrect ones are not penalized if norm < m_minus
    loss_sig = torch.clamp(m_plus - caps_norms, 0) ** 2
    loss_bkg = torch.clamp(caps_norms - m_minus, 0) ** 2

    loss = target_one_hot * loss_sig + loss_lambda * (1.0 - target_one_hot) * loss_bkg
    return reduce(loss, 'b cls -> b', 'sum')


for epoch in range(100):
    for i, (images, labels) in enumerate(train_loader):
        digit_capsules = encoder(images.to(device))
        labels_one_hot = torch.nn.functional.one_hot(labels, num_classes=10).float().to(device)
        loss = margin_loss(digit_capsules, labels_one_hot).mean()
        reconstructed = decoder(digit_capsules * rearrange(labels_one_hot, 'b caps -> b caps 1'))
        reconstruction_loss_mse = (images.to(device) - reconstructed).pow(2).mean()
        loss += reconstruction_loss_mse * 10.  # pick a weight
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

    accuracies = []
    for images, labels in test_loader:
        digit_capsules = encoder(images.to(device)).cpu()
        # predicted capsule is capsule with largest norm
        predicted_labels = digit_capsules.norm(dim=2).argmax(dim=1)
        accuracies += asnumpy(predicted_labels == labels).tolist()

    print(f'epoch {epoch} accuracy: {np.mean(accuracies)}')
def get_episode_map(env:habitat.RLEnv, mapper:Mapper, M:int, config:CN, device:torch.device):
    """Given the environment and the configuration, compute the global
    top-down wall and seen area maps by sampling maps for individual locations
    along a uniform grid in the environment, and registering them.
    """
    # Initialize a global map for the episode
    global_wall_map = torch.zeros(1, 2, M, M).to(device)
    global_seen_map = torch.zeros(1, 2, M, M).to(device)

    grid_size = config.TASK.GT_EGO_MAP.MAP_SCALE
    coordinate_max = maps.COORDINATE_MAX
    coordinate_min = maps.COORDINATE_MIN
    resolution = (coordinate_max - coordinate_min) / grid_size
    grid_resolution = (int(resolution), int(resolution))

    top_down_map = maps.get_topdown_map(
        env.habitat_env.sim, grid_resolution, 20000, draw_border=False,
    )

    map_w, map_h = top_down_map.shape

    intervals = (max(int(0.5 / grid_size), 1), max(int(0.5 / grid_size), 1))
    x_vals = np.arange(0, map_w, intervals[0], dtype=int)
    y_vals = np.arange(0, map_h, intervals[1], dtype=int)
    coors = np.stack(np.meshgrid(x_vals, y_vals), axis=2)  # (H, W, 2)
    coors = coors.reshape(-1, 2)  # (H*W, 2)
    map_vals = top_down_map[coors[:, 0], coors[:, 1]]
    valid_coors = coors[map_vals > 0]

    real_x_vals = coordinate_max - valid_coors[:, 0] * grid_size
    real_z_vals = coordinate_min + valid_coors[:, 1] * grid_size
    start_y = env.habitat_env.sim.get_agent_state().position[1]

    for j in tqdm.tqdm(range(real_x_vals.shape[0]),desc='occupacy map', position=2):
        for theta in np.arange(-np.pi, np.pi, np.pi / 3.0):
            position = [
                real_x_vals[j].item(),
                start_y.item(),
                real_z_vals[j].item(),
            ]
            rotation = [
                0.0,
                np.sin(theta / 2).item(),
                0.0,
                np.cos(theta / 2).item(),
            ]

            sim_obs = env.habitat_env.sim.get_observations_at(
                position, rotation, keep_agent_at_new_pose=True,
            )
            episode = env.habitat_env.current_episode
            obs = env.habitat_env.task.sensor_suite.get_observations(
                observations=sim_obs, episode=episode, task=env.habitat_env.task
            )
            ego_map_gt = torch.Tensor(obs["ego_map_gt"]).to(device)
            ego_map_gt = rearrange(ego_map_gt, "h w c -> () c h w")
            ego_wall_map_gt = torch.Tensor(obs["ego_wall_map_gt"]).to(device)
            ego_wall_map_gt = rearrange(ego_wall_map_gt, "h w c -> () c h w")
            pose_gt = torch.Tensor(obs["pose_gt"]).unsqueeze(0).to(device)
            global_seen_map = mapper.ext_register_map(
                global_seen_map, ego_map_gt, pose_gt
            )
            global_wall_map = mapper.ext_register_map(
                global_wall_map, ego_wall_map_gt, pose_gt
            )

    global_wall_map_np = asnumpy(rearrange(global_wall_map, "b c h w -> b h w c")[0])
    global_seen_map_np = asnumpy(rearrange(global_seen_map, "b c h w -> b h w c")[0])

    return global_seen_map_np, global_wall_map_np
Esempio n. 16
0
    def _eval(self):

        start_time = time.time()

        if self.config.MANUAL_COMMANDS:
            init_time = None
            manual_step_start_time = None
            total_manual_time = 0.0

        checkpoint_index = int(
            (re.findall('\d+', self.config.EVAL_CKPT_PATH_DIR))[-1])
        ckpt_dict = torch.load(self.config.EVAL_CKPT_PATH_DIR,
                               map_location="cpu")

        print(
            f'Number of steps of the ckpt: {ckpt_dict["extra_state"]["step"]}')

        config = self._setup_config(ckpt_dict)
        ppo_cfg = config.RL.PPO
        ans_cfg = config.RL.ANS

        self.mapper_rollouts = None
        self._setup_actor_critic_agent(ppo_cfg, ans_cfg)

        self.mapper_agent.load_state_dict(ckpt_dict["mapper_state_dict"])
        if self.local_agent is not None:
            self.local_agent.load_state_dict(ckpt_dict["local_state_dict"])
            self.local_actor_critic = self.local_agent.actor_critic
        else:
            self.local_actor_critic = self.ans_net.local_policy
        self.global_agent.load_state_dict(ckpt_dict["global_state_dict"])
        self.mapper = self.mapper_agent.mapper
        self.global_actor_critic = self.global_agent.actor_critic

        # Set models to evaluation
        self.mapper.eval()
        self.local_actor_critic.eval()
        self.global_actor_critic.eval()

        M = ans_cfg.overall_map_size
        V = ans_cfg.MAPPER.map_size
        s = ans_cfg.MAPPER.map_scale
        imH, imW = ans_cfg.image_scale_hw

        num_steps = self.config.T_EXP

        prev_action = torch.zeros(1, 1, device=self.device, dtype=torch.long)
        masks = torch.zeros(1, 1, device=self.device)

        try:
            self.sim = make_sim('PyRobot-v1',
                                config=self.config.TASK_CONFIG.PYROBOT)
        except (KeyboardInterrupt, SystemExit):
            sys.exit()

        pose = defaultdict()
        self.sim._robot.camera.set_tilt(math.radians(self.config.CAMERA_TILT),
                                        wait=True)
        print(
            f"\nStarting Camera State: {self.sim.get_agent_state()['camera']}")
        print(f"Starting Agent State: {self.sim.get_agent_state()['base']}")
        obs = [self.sim.reset()]

        if self.config.SAVE_OBS_IMGS:
            cv2.imwrite(f'obs/depth_dirty_s.jpg', obs[0]['depth'] * 255.0)

        obs[0]['depth'][..., 0] = self._correct_depth(obs, -1)

        if self.config.SAVE_OBS_IMGS:
            cv2.imwrite(f'obs/rgb_s.jpg', obs[0]['rgb'][:, :, ::-1])
            cv2.imwrite(f'depth_s.jpg', obs[0]['depth'] * 255.0)

        starting_agent_state = self.sim.get_agent_state()
        locobot2relative = CoordProjection(starting_agent_state['base'])
        pose['base'] = locobot2relative(starting_agent_state['base'])

        print(f"Starting Agent Pose: {pose['base']}\n")
        batch = self._prepare_batch(obs, -1, device=self.device)
        if ans_cfg.MAPPER.use_sensor_positioning:
            batch['pose'] = pose['base'].to(self.device)
            batch['pose'][0][1:] = -batch['pose'][0][1:]
        prev_batch = batch

        num_envs = self.config.NUM_PROCESSES
        agent_poses_over_time = []
        for i in range(num_envs):
            agent_poses_over_time.append(
                torch.tensor([(M - 1) / 2, (M - 1) / 2, 0]))
        state_estimates = {
            "pose_estimates":
            torch.zeros(num_envs, 3).to(self.device),
            "map_states":
            torch.zeros(num_envs, 2, M, M).to(self.device),
            "recurrent_hidden_states":
            torch.zeros(1, num_envs,
                        ans_cfg.LOCAL_POLICY.hidden_size).to(self.device),
            "visited_states":
            torch.zeros(num_envs, 1, M, M).to(self.device),
        }
        ground_truth_states = {
            "visible_occupancy": torch.zeros(num_envs, 2, M,
                                             M).to(self.device),
            "pose": torch.zeros(num_envs, 3).to(self.device),
            "environment_layout": torch.zeros(num_envs, 2, M,
                                              M).to(self.device)
        }

        # Reset ANS states
        self.ans_net.reset()

        # Frames for video creation
        rgb_frames = []
        if len(self.config.VIDEO_OPTION) > 0:
            os.makedirs(self.config.VIDEO_DIR, exist_ok=True)

        step_start_time = time.time()

        for i in range(num_steps):
            print(
                f"\n\n---------------------------------------------------<<< STEP {i} >>>---------------------------------------------------"
            )
            ep_time = torch.zeros(num_envs, 1, device=self.device).fill_(i)

            (
                mapper_inputs,
                local_policy_inputs,
                global_policy_inputs,
                mapper_outputs,
                local_policy_outputs,
                global_policy_outputs,
                state_estimates,
                intrinsic_rewards,
            ) = self.ans_net.act(
                batch,
                prev_batch,
                state_estimates,
                ep_time,
                masks,
                deterministic=True,
            )
            if self.config.SAVE_MAP_IMGS:
                cv2.imwrite(
                    f'maps/test_map_{i - 1}.jpg',
                    self._round_map(state_estimates['map_states']) * 255)

            action = local_policy_outputs["actions"][0][0]

            distance2ggoal = torch.norm(
                mapper_outputs['curr_map_position'] -
                self.ans_net.states["curr_global_goals"],
                dim=1) * s

            print(f"Distance to Global Goal: {distance2ggoal}")

            reached_flag = distance2ggoal < ans_cfg.goal_success_radius

            if self.config.MANUAL_COMMANDS:
                if init_time is None:
                    init_time = time.time() - start_time
                    total_manual_time = total_manual_time + init_time
                if manual_step_start_time is not None:
                    manual_step_time = time.time() - manual_step_start_time
                    total_manual_time = total_manual_time + manual_step_time
                action = torch.tensor(
                    int(input('Waiting input to start new action: ')))
                manual_step_start_time = time.time()

                if action.item() == 3:
                    reached_flag = True

            prev_action.copy_(action)

            if not reached_flag and action.item() != 3:
                print(f'Doing Env Step [{self.ACT_2_NAME[action.item()]}]...')
                action_command = self.ACT_2_COMMAND[action.item()]

                obs = self._do_action(action_command)

                if self.config.SAVE_OBS_IMGS:
                    cv2.imwrite(f'obs/depth_dirty_{i}.jpg',
                                obs[0]['depth'] * 255.0)

                # Correcting invalid depth pixels
                obs[0]['depth'][..., 0] = self._correct_depth(obs, i)

                if self.config.SAVE_OBS_IMGS:
                    cv2.imwrite(f'obs/rgb_{i}.jpg', obs[0]['rgb'][:, :, ::-1])
                    cv2.imwrite(f'obs/depth_{i}.jpg', obs[0]['depth'] * 255.0)

                agent_state = self.sim.get_agent_state()
                prev_batch = batch
                batch = self._prepare_batch(obs, i, device=self.device)

                pose = defaultdict()
                pose['base'] = locobot2relative(agent_state['base'])

                if ans_cfg.MAPPER.use_sensor_positioning:
                    batch['pose'] = pose['base'].to(self.device)
                    batch['pose'][0][1:] = -batch['pose'][0][1:]

                map_coords = convert_world2map(
                    batch['pose'], (M, M), ans_cfg.OCCUPANCY_ANTICIPATOR.
                    EGO_PROJECTION.map_scale).squeeze()
                map_coords = torch.cat(
                    (map_coords, batch['pose'][0][-1].reshape(1)))
                if self.config.COORD_DEBUG:
                    print('COORDINATES CHECK')
                    print(
                        f'Starting Agent State: {starting_agent_state["base"]}'
                    )
                    print(f'Current Agent State: {agent_state["base"]}')
                    print(
                        f'Current Sim Agent State: {self.sim.get_agent_state()["base"]}'
                    )
                    print(f'Current Global Coords: {batch["pose"]}')
                    print(f'Current Map Coords: {map_coords}')
                agent_poses_over_time.append(map_coords)

                step_time = time.time() - step_start_time
                print(f"\nStep Time: {step_time}")
                step_start_time = time.time()

            # Create new frame of the video
            if (len(self.config.VIDEO_OPTION) > 0):
                frame = observations_to_image(
                    obs[0],
                    observation_size=300,
                    collision_flag=self.config.DRAW_COLLISIONS)
                # Add ego_map_gt to frame
                ego_map_gt_i = asnumpy(batch["ego_map_gt"][0])  # (2, H, W)
                ego_map_gt_i = convert_gt2channel_to_gtrgb(ego_map_gt_i)
                ego_map_gt_i = cv2.resize(ego_map_gt_i, (300, 300))
                # frame = np.concatenate([frame], axis=1)
                # Generate ANS specific visualizations
                environment_layout = asnumpy(
                    ground_truth_states["environment_layout"][0])  # (2, H, W)
                visible_occupancy = mapper_outputs["gt_mt"][0].cpu().numpy(
                )  # (2, H, W)
                anticipated_occupancy = mapper_outputs["hat_mt"][0].cpu(
                ).numpy()  # (2, H, W)

                H = frame.shape[0]
                visible_occupancy_vis = generate_topdown_allocentric_map(
                    environment_layout,
                    visible_occupancy,
                    agent_poses_over_time,
                    thresh_explored=ans_cfg.thresh_explored,
                    thresh_obstacle=ans_cfg.thresh_obstacle,
                    zoom=False)
                visible_occupancy_vis = cv2.resize(visible_occupancy_vis,
                                                   (H, H))
                anticipated_occupancy_vis = generate_topdown_allocentric_map(
                    environment_layout,
                    anticipated_occupancy,
                    agent_poses_over_time,
                    thresh_explored=ans_cfg.thresh_explored,
                    thresh_obstacle=ans_cfg.thresh_obstacle,
                    zoom=False)
                anticipated_occupancy_vis = cv2.resize(
                    anticipated_occupancy_vis, (H, H))
                anticipated_action_map = generate_topdown_allocentric_map(
                    environment_layout,
                    anticipated_occupancy,
                    agent_poses_over_time,
                    zoom=False,
                    thresh_explored=ans_cfg.thresh_explored,
                    thresh_obstacle=ans_cfg.thresh_obstacle,
                )
                global_goals = self.ans_net.states["curr_global_goals"]
                local_goals = self.ans_net.states["curr_local_goals"]
                if global_goals is not None:
                    cX = int(global_goals[0, 0].item())
                    cY = int(global_goals[0, 1].item())
                    anticipated_action_map = cv2.circle(
                        anticipated_action_map,
                        (cX, cY),
                        10,
                        (255, 0, 0),
                        -1,
                    )
                if local_goals is not None:
                    cX = int(local_goals[0, 0].item())
                    cY = int(local_goals[0, 1].item())
                    anticipated_action_map = cv2.circle(
                        anticipated_action_map,
                        (cX, cY),
                        10,
                        (0, 255, 255),
                        -1,
                    )
                anticipated_action_map = cv2.resize(anticipated_action_map,
                                                    (H, H))

                maps_vis = np.concatenate(
                    [
                        visible_occupancy_vis, anticipated_occupancy_vis,
                        anticipated_action_map, ego_map_gt_i
                    ],
                    axis=1,
                )

                if self.config.RL.ANS.overall_map_size == 2001 or self.config.RL.ANS.overall_map_size == 961:
                    if frame.shape[1] < maps_vis.shape[1]:
                        diff = maps_vis.shape[1] - frame.shape[1]
                        npad = ((0, 0), (diff // 2, diff // 2), (0, 0))
                        frame = np.pad(frame,
                                       pad_width=npad,
                                       mode='constant',
                                       constant_values=0)
                    elif frame.shape[1] > maps_vis.shape[1]:
                        diff = frame.shape[1] - maps_vis.shape[1]
                        npad = ((0, 0), (diff // 2, diff // 2), (0, 0))
                        maps_vis = np.pad(maps_vis,
                                          pad_width=npad,
                                          mode='constant',
                                          constant_values=0)
                frame = np.concatenate([frame, maps_vis], axis=0)
                rgb_frames.append(frame)
                if self.config.SAVE_VIDEO_IMGS:
                    try:
                        os.mkdir("fig1")
                    except:
                        pass
                    print("Saved imgs for Fig. 1!")
                    cv2.imwrite(f'fig1/rgb_{step_start_time}.jpg',
                                obs[0]['rgb'][:, :, ::-1])
                    cv2.imwrite(f'fig1/depth_{step_start_time}.jpg',
                                obs[0]['depth'] * 255.0)
                    cv2.imwrite(f'fig1/aap_{step_start_time}.jpg',
                                anticipated_action_map[..., ::-1])
                    cv2.imwrite(f'fig1/em_{step_start_time}.jpg',
                                ego_map_gt_i[..., ::-1])
                if self.config.DEBUG_VIDEO_FRAME:
                    cv2.imwrite('last_frame.jpg', frame)

                if reached_flag:
                    for f in range(20):
                        rgb_frames.append(frame)

                # Video creation
                video_dict = {"t": start_time}
                if (i + 1) % 10 == 0 or reached_flag:
                    generate_video(
                        video_option=self.config.VIDEO_OPTION,
                        video_dir=self.config.VIDEO_DIR,
                        images=rgb_frames,
                        episode_id=0,
                        checkpoint_idx=checkpoint_index,
                        metrics=video_dict,
                        tb_writer=TensorboardWriter('tb/locobot'),
                    )

            if reached_flag:
                if self.config.MANUAL_COMMANDS:
                    manual_step_time = time.time() - manual_step_start_time
                    total_manual_time = total_manual_time + manual_step_time
                    print(f"Manual elapsed time: {total_manual_time}")

                print(f"Number of steps: {i + 1}")
                print(f"Elapsed time: {time.time() - start_time}")
                print(f"Final Distance to Goal: {distance2ggoal}")
                if "bump" in obs[0]:
                    print(f"Collision: {obs[0]['bump']}")
                print("Exiting...")
                break
        return