Example #1
0
class House3DRGBD:
    def __init__(self,
                 train_mode=True,
                 area_reward_scale=1,
                 collision_penalty=0.1,
                 step_penalty=0.0005,
                 max_depth=3.0,
                 render_door=False,
                 start_indoor=False,
                 ignore_collision=False,
                 ob_dilation_kernel=5,
                 large_map_size=80):
        self.seed()
        self.configs = get_configs()
        self.configs['large_map_size'] = large_map_size
        self.env = None

        self.train_mode = train_mode
        self.render_door = render_door
        self.ignore_collision = ignore_collision
        self.start_indoor = start_indoor
        self.render_height = self.configs['render_height']
        self.render_width = self.configs['render_width']
        self.img_height = self.configs['output_height']
        self.img_width = self.configs['output_width']
        self.ob_dilation_kernel = ob_dilation_kernel
        self.config = load_config(self.configs['path'],
                                  prefix=self.configs['par_path'])
        self.move_sensitivity = self.configs['move_sensitivity']
        self.rot_sensitivity = self.configs['rot_sensitivity']
        self.train_houses = self.configs['train_houses']
        self.test_houses = self.configs['test_houses']

        if train_mode:
            self.houses_id = self.train_houses
            # print("Number of traning houses:", len(self.houses_id))
        else:
            self.houses_id = None  # self.test_houses
        self.depth_threshold = (0, max_depth)
        self.area_reward_scale = area_reward_scale
        self.collision_penalty = collision_penalty
        self.step_penalty = step_penalty
        self.observation_space = [self.img_width, self.img_height, 3]
        self.action_space = [6]

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def reset(self, house_id=None, x=None, y=None, yaw=None):
        if house_id is None:
            house_id = self.np_random.choice(self.houses_id, 1)[0]
        self.hid = house_id
        if self.env is not None:
            del self.api
            del self.env
        self.api = objrender.RenderAPI(self.render_width,
                                       self.render_height,
                                       device=RENDERING_GPU)

        self.env = Environment(self.api,
                               house_id,
                               self.config,
                               GridDet=self.configs['GridDet'],
                               RenderDoor=self.render_door,
                               StartIndoor=self.start_indoor)

        if not self.train_mode:
            self.loc_map = self.env.gen_locmap()
            obs_map = self.env.house.obsMap.T
            self.obs_pos = obs_map == 1
            self.traj = []
            self.traj_actions = []
            self.grid_traj = []

        self.L_min = self.env.house.L_lo
        self.L_max = self.env.house.L_hi
        self.grid_size = self.env.house.grid_det
        grid_num = np.array(
            [self.env.house.n_row[0] + 1, self.env.house.n_row[1] + 1])
        self.grids_mat = np.zeros(tuple(grid_num), dtype=np.uint8)
        self.max_grid_size = np.max(grid_num)
        self.max_seen_area = float(np.prod(grid_num))
        self.env.reset(x=x, y=y, yaw=yaw)
        self.start_pos, self.grid_start_pos = self.get_camera_grid_pos()
        if not self.train_mode:
            print('start pose: ', self.start_pos)
            self.traj.append(self.start_pos.tolist())
            self.grid_traj.append(self.grid_start_pos.tolist())
        rgb, depth, extrinsics = self.get_obs()
        large_loc_map, small_loc_map = self.get_loc_map()
        self.seen_area = self.get_seen_area(rgb, depth, extrinsics,
                                            self.grids_mat)

        self.ep_len = 0
        self.ep_reward = 0
        self.collision_times = 0
        ob = (self.resize_img(rgb), large_loc_map, small_loc_map)
        return ob

    def step(self, action):
        if self.ignore_collision:
            collision_flag = self.motion_primitive_no_check(action)
        else:
            collision_flag = self.motion_primitive(action)
        rgb, depth, extrinsics = self.get_obs()
        large_loc_map, small_loc_map = self.get_loc_map()
        if not self.train_mode:
            current_pos, grid_current_pos = self.get_camera_grid_pos()
            self.traj_actions.append(int(action))
            self.traj.append(current_pos.tolist())
            self.grid_traj.append(grid_current_pos.tolist())

        reward, seen_area, raw_reward = self.cal_reward(
            rgb, depth, extrinsics, collision_flag)
        self.ep_len += 1
        self.ep_reward += reward
        if collision_flag:
            self.collision_times += 1
        info = {
            'reward_so_far': self.ep_reward,
            'steps_so_far': self.ep_len,
            'seen_area': seen_area,
            'collisions': self.collision_times,
            'start_pose': self.start_pos,
            'house_id': self.hid,
            'collision_flag': collision_flag
        }
        info = {**info, **raw_reward}
        done = False
        ob = (self.resize_img(rgb), large_loc_map, small_loc_map)
        return ob, reward, done, info

    def render(self):
        loc_map, small_map = self.get_loc_map()

        if not self.train_mode:
            rad = self.env.house.robotRad / self.env.house.grid_det
            x = int(loc_map.shape[0] / 2)
            y = int(loc_map.shape[1] / 2)
            cv2.circle(loc_map, (x, y), 1, (255, 0, 255), thickness=-1)
            x = int(small_map.shape[0] / 2)
            y = int(small_map.shape[1] / 2)
            cv2.circle(small_map, (x, y),
                       int(rad) * 2, (255, 0, 255),
                       thickness=-1)

        loc_map = cv2.resize(loc_map, (self.render_width, self.render_height),
                             interpolation=cv2.INTER_CUBIC)
        if not self.train_mode:
            x = int(loc_map.shape[0] / 2)
            y = int(loc_map.shape[1] / 2)
            cv2.circle(loc_map, (x, y), 2, (255, 0, 255), thickness=-1)
        self.env.set_render_mode('rgb')
        rgb = self.env.render()

        img = np.concatenate((rgb, loc_map), axis=1)
        img = img[:, :, ::-1]
        img = cv2.resize(img, (img.shape[1] * 3, img.shape[0] * 3),
                         interpolation=cv2.INTER_CUBIC)
        cv2.imshow("nav", img)
        cv2.waitKey(40)

    def resize_img(self, img):
        return cv2.resize(img, (self.img_width, self.img_height),
                          interpolation=cv2.INTER_AREA)

    def motion_primitive(self, action):
        # 0: Forward
        # 1: Turn Left
        # 2: Turn Right
        # 3: Strafe Left
        # 4: Strafe Right
        # 5: Backward
        collision_flag = False
        if action == 0:
            if not self.train_mode:
                print('Action: Forward')
            if not self.env.move_forward(self.move_sensitivity):
                if not self.train_mode:
                    print('Cannot move forward, collision!!!')
                collision_flag = True
        elif action == 1:
            if not self.train_mode:
                print('Action: Turn Left')
            self.env.rotate(-self.rot_sensitivity)
        elif action == 2:
            if not self.train_mode:
                print('Action: Turn Right')
            self.env.rotate(self.rot_sensitivity)
        elif action == 3:
            if not self.train_mode:
                print('Action: Strafe Left')
            if not self.env.move_forward(dist_fwd=0,
                                         dist_hor=-self.move_sensitivity):
                if not self.train_mode:
                    print('Cannot strafe left, collision!!!')
                collision_flag = True
        elif action == 4:
            if not self.train_mode:
                print('Action: Strafe Right')
            if not self.env.move_forward(dist_fwd=0,
                                         dist_hor=self.move_sensitivity):
                if not self.train_mode:
                    print('Cannot strafe right, collision!!!')
                collision_flag = True
        elif action == 5:
            if not self.train_mode:
                print('Action: Backward')
            if not self.env.move_forward(-self.move_sensitivity):
                if not self.train_mode:
                    print('Cannot move backward, collision!!!')
                collision_flag = True
        else:
            raise ValueError('unknown action type: [{0:d}]'.format(action))
        return collision_flag

    def move_forward(self, dist_fwd, dist_hor=0):
        """
        Move with `fwd` distance to the front and `hor` distance to the right.
        Both distance are float numbers.
        Ignore collision !!!
        """
        pos = self.env.cam.pos
        pos = pos + self.env.cam.front * dist_fwd
        pos = pos + self.env.cam.right * dist_hor
        self.env.cam.pos.x = pos.x
        self.env.cam.pos.z = pos.z

    def motion_primitive_no_check(self, action):
        # motion primitive without collision checking
        # 0: Forward
        # 1: Turn Left
        # 2: Turn Right
        # 3: Strafe Left
        # 4: Strafe Right
        # 5: Backward
        collision_flag = False
        if action == 0:
            self.move_forward(self.move_sensitivity)
        elif action == 1:
            self.env.rotate(-self.rot_sensitivity)
        elif action == 2:
            self.env.rotate(self.rot_sensitivity)
        elif action == 3:
            self.move_forward(dist_fwd=0, dist_hor=-self.move_sensitivity)
        elif action == 4:
            self.move_forward(dist_fwd=0, dist_hor=self.move_sensitivity)
        elif action == 5:
            self.move_forward(-self.move_sensitivity)
        else:
            raise ValueError('unknown action type: [{0:d}]'.format(action))
        return collision_flag

    def get_obs(self):
        self.env.set_render_mode('rgb')
        rgb = self.env.render()
        self.env.set_render_mode('depth')
        depth = self.env.render()
        infmask = depth[:, :, 1]
        depth = depth[:, :, 0] * (infmask == 0)
        true_depth = depth.astype(np.float32) / 255.0 * 20.0
        extrinsics = self.env.cam.getExtrinsicsNumpy()
        return rgb, true_depth, extrinsics

    def get_seen_area(self, rgb, depth, extrinsics, out_mat, inv_E=True):
        points, points_colors = gen_point_cloud(
            depth,
            rgb,
            extrinsics,
            depth_threshold=self.depth_threshold,
            inv_E=inv_E)
        grid_locs = np.floor(
            (points[:, [0, 2]] - self.L_min) / self.grid_size).astype(int)
        grids_mat = np.zeros(
            (self.grids_mat.shape[0], self.grids_mat.shape[1]), dtype=np.uint8)

        high_filter_idx = points[:, 1] < HEIGHT_THRESHOLD[1]
        low_filter_idx = points[:, 1] > HEIGHT_THRESHOLD[0]
        obstacle_idx = np.logical_and(high_filter_idx, low_filter_idx)

        self.safe_assign(grids_mat, grid_locs[high_filter_idx, 0],
                         grid_locs[high_filter_idx, 1], 2)
        kernel = np.ones((3, 3), np.uint8)
        grids_mat = cv2.morphologyEx(grids_mat, cv2.MORPH_CLOSE, kernel)

        obs_mat = np.zeros((self.grids_mat.shape[0], self.grids_mat.shape[1]),
                           dtype=np.uint8)
        self.safe_assign(obs_mat, grid_locs[obstacle_idx, 0],
                         grid_locs[obstacle_idx, 1], 1)
        kernel = np.ones((self.ob_dilation_kernel, self.ob_dilation_kernel),
                         np.uint8)
        obs_mat = cv2.morphologyEx(obs_mat, cv2.MORPH_CLOSE, kernel)
        obs_idx = np.where(obs_mat == 1)
        self.safe_assign(grids_mat, obs_idx[0], obs_idx[1], 1)
        out_mat[np.where(grids_mat == 2)] = 2
        out_mat[np.where(grids_mat == 1)] = 1
        seen_area = np.sum(out_mat > 0)
        return seen_area

    def cal_reward(self, rgb, depth, extrinsics, collision_flag):
        filled_grid_num = self.get_seen_area(rgb,
                                             depth,
                                             extrinsics,
                                             self.grids_mat,
                                             inv_E=True)
        area_reward = (filled_grid_num - self.seen_area)
        reward = area_reward * self.area_reward_scale
        if collision_flag:
            reward -= self.collision_penalty
        reward -= self.step_penalty
        self.seen_area = filled_grid_num
        raw_reward = {'area': area_reward, 'collision_flag': collision_flag}
        return reward, filled_grid_num, raw_reward

    def get_loc_map(self):
        top_down_map = self.grids_mat.T.copy()

        half_size = max(top_down_map.shape[0], top_down_map.shape[1],
                        self.configs['large_map_range']) * 3
        ego_map = np.ones(
            (half_size * 2, half_size * 2, 3), dtype=np.uint8) * 255
        loc_map = np.zeros((top_down_map.shape[0], top_down_map.shape[1], 3),
                           dtype=np.uint8)
        loc_map[top_down_map == 0] = np.array([255, 255, 255])
        loc_map[top_down_map == 1] = np.array([0, 0, 255])
        loc_map[top_down_map == 2] = np.array([0, 255, 0])
        current_pos, grid_current_pos = self.get_camera_grid_pos()
        x_start = half_size - grid_current_pos[1]
        y_start = half_size - grid_current_pos[0]
        x_end = x_start + top_down_map.shape[0]
        y_end = y_start + top_down_map.shape[1]
        assert x_start >= 0 and y_start >= 0 and \
               x_end <= ego_map.shape[0] and y_end <= ego_map.shape[1]
        ego_map[x_start:x_end, y_start:y_end] = loc_map
        center = (half_size, half_size)
        rot_angle = self.constrain_to_pm_pi(90 + current_pos[2])
        M = cv2.getRotationMatrix2D(center, rot_angle, 1.0)
        ego_map = cv2.warpAffine(ego_map,
                                 M, (ego_map.shape[1], ego_map.shape[0]),
                                 flags=cv2.INTER_AREA,
                                 borderMode=cv2.BORDER_CONSTANT,
                                 borderValue=(255, 255, 255))
        start = half_size - self.configs['small_map_range']
        end = half_size + self.configs['small_map_range']
        small_ego_map = ego_map[start:end, start:end]

        start = half_size - self.configs['large_map_range']
        end = half_size + self.configs['large_map_range']
        assert start >= 0
        assert end <= ego_map.shape[0]
        large_ego_map = ego_map[start:end, start:end]
        return cv2.resize(
            large_ego_map,
            (self.configs['large_map_size'], self.configs['large_map_size']),
            interpolation=cv2.INTER_AREA), small_ego_map

    def safe_assign(self, im_map, x_idx, y_idx, value):
        try:
            im_map[x_idx, y_idx] = value
        except IndexError:
            valid_idx1 = np.logical_and(x_idx >= 0, x_idx < im_map.shape[0])
            valid_idx2 = np.logical_and(y_idx >= 0, y_idx < im_map.shape[1])
            valid_idx = np.logical_and(valid_idx1, valid_idx2)
            im_map[x_idx[valid_idx], y_idx[valid_idx]] = value

    def constrain_to_pm_pi(self, theta):
        # make sure theta is within [-180, 180)
        return (theta + 180) % 360 - 180

    def get_camera_grid_pos(self):
        current_pos = np.array([
            self.env.cam.pos.x, self.env.cam.pos.z,
            self.constrain_to_pm_pi(self.env.cam.yaw)
        ])
        grid_pos = np.array(
            self.env.house.to_grid(current_pos[0], current_pos[1]))
        return current_pos, grid_pos

    def truncated_norm(self, mu, sigma, lower_limit, upper_limit, size):
        if sigma == 0:
            return mu
        lower_limit = (lower_limit - mu) / sigma
        upper_limit = (upper_limit - mu) / sigma
        r = truncnorm(lower_limit, upper_limit, loc=mu, scale=sigma)
        return r.rvs(size)
Example #2
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--data_file',
                        default='../path_data/cleaned_human_demo_data.json',
                        type=str)
    parser.add_argument('--demo_id', default=0, type=int)
    parser.add_argument('--width', type=int, default=600)
    parser.add_argument('--height', type=int, default=450)
    parser.add_argument('--filter_height', type=bool, default=True)
    args = parser.parse_args()
    with open(args.data_file, 'r') as f:
        data = json.load(f)
    demo = data[args.demo_id]
    print('Total time steps:', len(demo['loc']))
    locs = np.array(demo['loc'][1:])
    ques = demo['question']
    answer = demo['answer']
    text = 'Q: {0:s}   A: {1:s}'.format(ques, answer)
    text_height = 60

    cfg = load_config('config.json')
    api = objrender.RenderAPI(w=args.width, h=args.height, device=0)
    env = Environment(api, demo['house_id'], cfg)

    L_min = env.house.L_min_coor
    L_max = env.house.L_max_coor
    L_min = np.array([[env.house.L_lo[0], L_min[1], env.house.L_lo[1]]])
    L_max = np.array([[env.house.L_hi[0], L_max[1], env.house.L_hi[1]]])
    grid_size = env.house.grid_det
    n_row = env.house.n_row
    grid_num = np.array([
        n_row[0] + 1,
        int((L_max[0][1] - L_min[0][1]) / (grid_size + 1e-8)) + 1, n_row[1] + 1
    ])
    print('Grid size:', grid_size)
    print('Number of grid in [x, y, z]:', grid_num)

    all_grids = np.zeros(tuple(grid_num), dtype=bool)
    grid_colors = np.zeros(tuple(grid_num) + (3, ), dtype=np.uint8)
    loc_map = env.gen_locmap()
    obs_map = env.house.obsMap.T
    obs_pos = obs_map == 1
    for t in tqdm(range(len(locs))):
        env.reset(x=locs[t][0], y=locs[t][2], yaw=locs[t][3])
        depth = env.render(RenderMode.DEPTH)
        rgb = env.render(RenderMode.RGB)
        semantic = env.render(RenderMode.SEMANTIC)
        infmask = depth[:, :, 1]
        depth = depth[:, :, 0] * (infmask == 0)
        true_depth = depth.astype(np.float32) / 255.0 * 20.0
        extrinsics = env.cam.getExtrinsicsNumpy()
        points, points_colors = gen_point_cloud(true_depth, rgb, extrinsics)
        grid_locs = np.floor((points - L_min) / grid_size).astype(int)
        all_grids[grid_locs[:, 0], grid_locs[:, 1], grid_locs[:, 2]] = True
        grid_colors[grid_locs[:, 0], grid_locs[:, 1],
                    grid_locs[:, 2]] = points_colors
        depth = np.stack([depth] * 3, axis=2)

        loc_map[grid_locs[:, 2], grid_locs[:,
                                           0], :] = np.array([250, 120, 120])
        loc_map[obs_pos] = 0
        rad = env.house.robotRad / env.house.grid_det
        x, y = env.cam.pos.x, env.cam.pos.z
        x, y = env.house.to_grid(x, y)
        loc_map_cp = loc_map.copy()
        cv2.circle(loc_map_cp, (x, y), int(rad), (0, 0, 255), thickness=-1)
        loc_map_resized = cv2.resize(loc_map_cp, env.resolution)
        concat1 = np.concatenate((rgb, semantic), axis=1)
        concat2 = np.concatenate((depth, loc_map_resized), axis=1)
        ret = np.concatenate((concat1, concat2), axis=0)
        ret = ret[:, :, ::-1]
        pad_text = np.ones((text_height, ret.shape[1], ret.shape[2]),
                           dtype=np.uint8)
        mat_w_text = np.concatenate((np.copy(ret), pad_text), axis=0)
        cv2.putText(mat_w_text, text, (10, mat_w_text.shape[0] - 10), FONT, 1,
                    (255, 255, 255), 2, cv2.LINE_AA)

        cv2.imshow("human_demo", mat_w_text)
        cv2.waitKey(20)

    valid_grids = np.argwhere(all_grids)
    valid_grid_center = valid_grids * grid_size + L_min
    valid_grid_color = grid_colors[valid_grids[:, 0], valid_grids[:, 1],
                                   valid_grids[:, 2]]

    if args.filter_height:
        height_lim = [-0.1, 2.3]
        valid_idx = np.logical_and(valid_grid_center[:, 1] >= height_lim[0],
                                   valid_grid_center[:, 1] <= height_lim[1])

        valid_grid_center = valid_grid_center[valid_idx]
        valid_grid_color = valid_grid_color[valid_idx]
        valid_grids = valid_grids[valid_idx]

    loc_map = env.gen_locmap()

    obs_map = env.house.obsMap.T
    loc_map[valid_grids[:, 2], valid_grids[:,
                                           0], :] = np.array([250, 120, 120])
    loc_map[obs_map == 1] = 0

    loc_map = cv2.resize(loc_map, env.resolution)
    cv2.imwrite('seen_map.png', loc_map)
    if USE_OPEN3D:
        pcd = PointCloud()
        pcd.points = Vector3dVector(valid_grid_center)
        pcd.colors = Vector3dVector(valid_grid_color / 255.0)
        coord = create_mesh_coordinate_frame(3, [40, 0, 35])
        draw_geometries([pcd, coord])