Example #1
0
def create_env(house="02f594bb5d8c5871bde0d8c8db20125b"):
    api = objrender.RenderAPI(w=600, h=450, device=0)
    cfg = create_default_config('/home/rythei/guide/SUNCG/house')

    if house is None:
        env = Environment(api, TEST_HOUSE, cfg)
    else:
        env = Environment(api, house, cfg)
    env.reset()

    return env
Example #2
0
 def _initialize(self):
     h, w = self.screen_size
     api = objrender.RenderAPI(w=w, h=h, device=0)
     env = Environment(api, self.scene, self.configuration)
     env.reset()
     env = RoomNavTask(env,
                       discrete_action=True,
                       depth_signal=False,
                       segment_input=False,
                       reward_type=None,
                       hardness=self.hardness)
     self._env = env
Example #3
0
def GymHouseEnv2(scene='05cac5f7fdd5f8138234164e76a97383',
                 screen_size=(84, 84),
                 goals=['living_room'],
                 configuration=None):
    h, w = screen_size
    api = objrender.RenderAPI(w=w, h=h, device=0)
    env = Environment(api, scene, create_configuration(configuration))
    env.reset()
    env = RoomNavTask2(env,
                       discrete_action=True,
                       depth_signal=False,
                       segment_input=False,
                       reward_type=None)
    env.observation_space.dtype = np.uint8
    return GymHouseWrapper(env, room_types=goals, screen_size=screen_size)
Example #4
0
def worker(idx, house_id, device):
    colormapFile = "../metadata/colormap_coarse.csv"
    api = objrender.RenderAPI(w=args.width, h=args.height, device=device)
    env = Environment(api, house_id, cfg)
    N = 15000
    start = time.time()
    cnt = 0
    env.reset()
    for t in range(N):
        cnt += 1
        env.move_forward(random.random() * 3, random.random() * 3)
        mat = env.render()
        if (cnt % 50 == 0):
            env.reset()
    end = time.time()
    print("Worker {}, speed {:.3f} fps".format(idx, N / (end - start)))
Example #5
0
    def test_render(self):
        api = objrender.RenderAPI(w=SIDE, h=SIDE, device=0)
        cfg = load_config('config.json')
        houseID, house = find_first_good_house(cfg)
        env = Environment(api, house, cfg)
        location = house.getRandomLocation(ROOM_TYPE)
        env.reset(*location)

        # Check RGB
        env.set_render_mode(RenderMode.RGB)
        rgb = env.render_cube_map()
        self.assertEqual(rgb.shape, (SIDE, SIDE * 6, 3))

        # Check SEMANTIC
        env.set_render_mode(RenderMode.RGB)
        semantic = env.render_cube_map()
        self.assertEqual(semantic.shape, (SIDE, SIDE * 6, 3))

        # Check INSTANCE
        env.set_render_mode(RenderMode.INSTANCE)
        instance = env.render_cube_map()
        self.assertEqual(instance.shape, (SIDE, SIDE * 6, 3))

        # Check DEPTH
        env.set_render_mode(RenderMode.DEPTH)
        depth = env.render_cube_map()
        self.assertEqual(depth.dtype, np.uint8)
        self.assertEqual(depth.shape, (SIDE, SIDE * 6, 2))
        depth_value = depth[0, 0, 0] * 20.0 / 255.0

        # Check INVDEPTH
        env.set_render_mode(RenderMode.INVDEPTH)
        invdepth = env.render_cube_map()
        self.assertEqual(invdepth.dtype, np.uint8)
        self.assertEqual(invdepth.shape, (SIDE, SIDE * 6, 3))

        # Convert to 16 bit and then to depth
        id16 = 256 * invdepth[:, :, 0] + invdepth[:, :, 1]
        depth2 = depth_of_inverse_depth(id16)
        self.assertEqual(depth2.dtype, np.float)
        self.assertEqual(depth2.shape, (SIDE, SIDE * 6))

        # Check that depth value is within 5% of depth from RenderMode.DEPTH
        self.assertAlmostEqual(
            depth2[0, 0], depth_value, delta=depth_value * 0.05)
Example #6
0
# Copyright 2017-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.

import cv2

from House3D import objrender, Environment, load_config

if __name__ == '__main__':
    api = objrender.RenderAPI(w=600, h=450, device=0)
    cfg = load_config('config.json')

    env = Environment(api, '00065ecbdd7300d35ef4328ffe871505', cfg)
    env.reset()  # put the agent into the house

    # fourcc = cv2.VideoWriter_fourcc(*'X264')
    # writer = cv2.VideoWriter('out.avi', fourcc, 30, (1200, 900))
    while True:
        mat = env.debug_render()
        # writer.write(mat)
        cv2.imshow("aaa", mat)
        key = cv2.waitKey(0)
        if not env.keyboard_control(key):
            break
    # writer.release()
Example #7
0
# Copyright 2017-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.

import tqdm
import cv2

from House3D import objrender, Environment, load_config


if __name__ == '__main__':
    api = objrender.RenderAPI(w=600, h=450, device=0)
    cfg = load_config('config.json')

    env = Environment(api, '00065ecbdd7300d35ef4328ffe871505', cfg)

    # fourcc = cv2.VideoWriter_fourcc(*'X264')
    # writer = cv2.VideoWriter('out.avi', fourcc, 30, (1200, 900))
    for t in tqdm.trange(1000):
        if t % 1000 == 0:
            env.reset()
        mat = env.debug_render()
        # writer.write(mat)
        cv2.imshow("aaa", mat)
        key = cv2.waitKey(0)
        if not env.keyboard_control(key):
            break
    # writer.release()
Example #8
0
class House3DRGBD:
    def __init__(self,
                 train_mode=True,
                 area_reward_scale=1,
                 collision_penalty=0.1,
                 step_penalty=0.0005,
                 max_depth=3.0,
                 render_door=False,
                 start_indoor=False,
                 ignore_collision=False,
                 ob_dilation_kernel=5,
                 large_map_size=80):
        self.seed()
        self.configs = get_configs()
        self.configs['large_map_size'] = large_map_size
        self.env = None

        self.train_mode = train_mode
        self.render_door = render_door
        self.ignore_collision = ignore_collision
        self.start_indoor = start_indoor
        self.render_height = self.configs['render_height']
        self.render_width = self.configs['render_width']
        self.img_height = self.configs['output_height']
        self.img_width = self.configs['output_width']
        self.ob_dilation_kernel = ob_dilation_kernel
        self.config = load_config(self.configs['path'],
                                  prefix=self.configs['par_path'])
        self.move_sensitivity = self.configs['move_sensitivity']
        self.rot_sensitivity = self.configs['rot_sensitivity']
        self.train_houses = self.configs['train_houses']
        self.test_houses = self.configs['test_houses']

        if train_mode:
            self.houses_id = self.train_houses
            # print("Number of traning houses:", len(self.houses_id))
        else:
            self.houses_id = None  # self.test_houses
        self.depth_threshold = (0, max_depth)
        self.area_reward_scale = area_reward_scale
        self.collision_penalty = collision_penalty
        self.step_penalty = step_penalty
        self.observation_space = [self.img_width, self.img_height, 3]
        self.action_space = [6]

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def reset(self, house_id=None, x=None, y=None, yaw=None):
        if house_id is None:
            house_id = self.np_random.choice(self.houses_id, 1)[0]
        self.hid = house_id
        if self.env is not None:
            del self.api
            del self.env
        self.api = objrender.RenderAPI(self.render_width,
                                       self.render_height,
                                       device=RENDERING_GPU)

        self.env = Environment(self.api,
                               house_id,
                               self.config,
                               GridDet=self.configs['GridDet'],
                               RenderDoor=self.render_door,
                               StartIndoor=self.start_indoor)

        if not self.train_mode:
            self.loc_map = self.env.gen_locmap()
            obs_map = self.env.house.obsMap.T
            self.obs_pos = obs_map == 1
            self.traj = []
            self.traj_actions = []
            self.grid_traj = []

        self.L_min = self.env.house.L_lo
        self.L_max = self.env.house.L_hi
        self.grid_size = self.env.house.grid_det
        grid_num = np.array(
            [self.env.house.n_row[0] + 1, self.env.house.n_row[1] + 1])
        self.grids_mat = np.zeros(tuple(grid_num), dtype=np.uint8)
        self.max_grid_size = np.max(grid_num)
        self.max_seen_area = float(np.prod(grid_num))
        self.env.reset(x=x, y=y, yaw=yaw)
        self.start_pos, self.grid_start_pos = self.get_camera_grid_pos()
        if not self.train_mode:
            print('start pose: ', self.start_pos)
            self.traj.append(self.start_pos.tolist())
            self.grid_traj.append(self.grid_start_pos.tolist())
        rgb, depth, extrinsics = self.get_obs()
        large_loc_map, small_loc_map = self.get_loc_map()
        self.seen_area = self.get_seen_area(rgb, depth, extrinsics,
                                            self.grids_mat)

        self.ep_len = 0
        self.ep_reward = 0
        self.collision_times = 0
        ob = (self.resize_img(rgb), large_loc_map, small_loc_map)
        return ob

    def step(self, action):
        if self.ignore_collision:
            collision_flag = self.motion_primitive_no_check(action)
        else:
            collision_flag = self.motion_primitive(action)
        rgb, depth, extrinsics = self.get_obs()
        large_loc_map, small_loc_map = self.get_loc_map()
        if not self.train_mode:
            current_pos, grid_current_pos = self.get_camera_grid_pos()
            self.traj_actions.append(int(action))
            self.traj.append(current_pos.tolist())
            self.grid_traj.append(grid_current_pos.tolist())

        reward, seen_area, raw_reward = self.cal_reward(
            rgb, depth, extrinsics, collision_flag)
        self.ep_len += 1
        self.ep_reward += reward
        if collision_flag:
            self.collision_times += 1
        info = {
            'reward_so_far': self.ep_reward,
            'steps_so_far': self.ep_len,
            'seen_area': seen_area,
            'collisions': self.collision_times,
            'start_pose': self.start_pos,
            'house_id': self.hid,
            'collision_flag': collision_flag
        }
        info = {**info, **raw_reward}
        done = False
        ob = (self.resize_img(rgb), large_loc_map, small_loc_map)
        return ob, reward, done, info

    def render(self):
        loc_map, small_map = self.get_loc_map()

        if not self.train_mode:
            rad = self.env.house.robotRad / self.env.house.grid_det
            x = int(loc_map.shape[0] / 2)
            y = int(loc_map.shape[1] / 2)
            cv2.circle(loc_map, (x, y), 1, (255, 0, 255), thickness=-1)
            x = int(small_map.shape[0] / 2)
            y = int(small_map.shape[1] / 2)
            cv2.circle(small_map, (x, y),
                       int(rad) * 2, (255, 0, 255),
                       thickness=-1)

        loc_map = cv2.resize(loc_map, (self.render_width, self.render_height),
                             interpolation=cv2.INTER_CUBIC)
        if not self.train_mode:
            x = int(loc_map.shape[0] / 2)
            y = int(loc_map.shape[1] / 2)
            cv2.circle(loc_map, (x, y), 2, (255, 0, 255), thickness=-1)
        self.env.set_render_mode('rgb')
        rgb = self.env.render()

        img = np.concatenate((rgb, loc_map), axis=1)
        img = img[:, :, ::-1]
        img = cv2.resize(img, (img.shape[1] * 3, img.shape[0] * 3),
                         interpolation=cv2.INTER_CUBIC)
        cv2.imshow("nav", img)
        cv2.waitKey(40)

    def resize_img(self, img):
        return cv2.resize(img, (self.img_width, self.img_height),
                          interpolation=cv2.INTER_AREA)

    def motion_primitive(self, action):
        # 0: Forward
        # 1: Turn Left
        # 2: Turn Right
        # 3: Strafe Left
        # 4: Strafe Right
        # 5: Backward
        collision_flag = False
        if action == 0:
            if not self.train_mode:
                print('Action: Forward')
            if not self.env.move_forward(self.move_sensitivity):
                if not self.train_mode:
                    print('Cannot move forward, collision!!!')
                collision_flag = True
        elif action == 1:
            if not self.train_mode:
                print('Action: Turn Left')
            self.env.rotate(-self.rot_sensitivity)
        elif action == 2:
            if not self.train_mode:
                print('Action: Turn Right')
            self.env.rotate(self.rot_sensitivity)
        elif action == 3:
            if not self.train_mode:
                print('Action: Strafe Left')
            if not self.env.move_forward(dist_fwd=0,
                                         dist_hor=-self.move_sensitivity):
                if not self.train_mode:
                    print('Cannot strafe left, collision!!!')
                collision_flag = True
        elif action == 4:
            if not self.train_mode:
                print('Action: Strafe Right')
            if not self.env.move_forward(dist_fwd=0,
                                         dist_hor=self.move_sensitivity):
                if not self.train_mode:
                    print('Cannot strafe right, collision!!!')
                collision_flag = True
        elif action == 5:
            if not self.train_mode:
                print('Action: Backward')
            if not self.env.move_forward(-self.move_sensitivity):
                if not self.train_mode:
                    print('Cannot move backward, collision!!!')
                collision_flag = True
        else:
            raise ValueError('unknown action type: [{0:d}]'.format(action))
        return collision_flag

    def move_forward(self, dist_fwd, dist_hor=0):
        """
        Move with `fwd` distance to the front and `hor` distance to the right.
        Both distance are float numbers.
        Ignore collision !!!
        """
        pos = self.env.cam.pos
        pos = pos + self.env.cam.front * dist_fwd
        pos = pos + self.env.cam.right * dist_hor
        self.env.cam.pos.x = pos.x
        self.env.cam.pos.z = pos.z

    def motion_primitive_no_check(self, action):
        # motion primitive without collision checking
        # 0: Forward
        # 1: Turn Left
        # 2: Turn Right
        # 3: Strafe Left
        # 4: Strafe Right
        # 5: Backward
        collision_flag = False
        if action == 0:
            self.move_forward(self.move_sensitivity)
        elif action == 1:
            self.env.rotate(-self.rot_sensitivity)
        elif action == 2:
            self.env.rotate(self.rot_sensitivity)
        elif action == 3:
            self.move_forward(dist_fwd=0, dist_hor=-self.move_sensitivity)
        elif action == 4:
            self.move_forward(dist_fwd=0, dist_hor=self.move_sensitivity)
        elif action == 5:
            self.move_forward(-self.move_sensitivity)
        else:
            raise ValueError('unknown action type: [{0:d}]'.format(action))
        return collision_flag

    def get_obs(self):
        self.env.set_render_mode('rgb')
        rgb = self.env.render()
        self.env.set_render_mode('depth')
        depth = self.env.render()
        infmask = depth[:, :, 1]
        depth = depth[:, :, 0] * (infmask == 0)
        true_depth = depth.astype(np.float32) / 255.0 * 20.0
        extrinsics = self.env.cam.getExtrinsicsNumpy()
        return rgb, true_depth, extrinsics

    def get_seen_area(self, rgb, depth, extrinsics, out_mat, inv_E=True):
        points, points_colors = gen_point_cloud(
            depth,
            rgb,
            extrinsics,
            depth_threshold=self.depth_threshold,
            inv_E=inv_E)
        grid_locs = np.floor(
            (points[:, [0, 2]] - self.L_min) / self.grid_size).astype(int)
        grids_mat = np.zeros(
            (self.grids_mat.shape[0], self.grids_mat.shape[1]), dtype=np.uint8)

        high_filter_idx = points[:, 1] < HEIGHT_THRESHOLD[1]
        low_filter_idx = points[:, 1] > HEIGHT_THRESHOLD[0]
        obstacle_idx = np.logical_and(high_filter_idx, low_filter_idx)

        self.safe_assign(grids_mat, grid_locs[high_filter_idx, 0],
                         grid_locs[high_filter_idx, 1], 2)
        kernel = np.ones((3, 3), np.uint8)
        grids_mat = cv2.morphologyEx(grids_mat, cv2.MORPH_CLOSE, kernel)

        obs_mat = np.zeros((self.grids_mat.shape[0], self.grids_mat.shape[1]),
                           dtype=np.uint8)
        self.safe_assign(obs_mat, grid_locs[obstacle_idx, 0],
                         grid_locs[obstacle_idx, 1], 1)
        kernel = np.ones((self.ob_dilation_kernel, self.ob_dilation_kernel),
                         np.uint8)
        obs_mat = cv2.morphologyEx(obs_mat, cv2.MORPH_CLOSE, kernel)
        obs_idx = np.where(obs_mat == 1)
        self.safe_assign(grids_mat, obs_idx[0], obs_idx[1], 1)
        out_mat[np.where(grids_mat == 2)] = 2
        out_mat[np.where(grids_mat == 1)] = 1
        seen_area = np.sum(out_mat > 0)
        return seen_area

    def cal_reward(self, rgb, depth, extrinsics, collision_flag):
        filled_grid_num = self.get_seen_area(rgb,
                                             depth,
                                             extrinsics,
                                             self.grids_mat,
                                             inv_E=True)
        area_reward = (filled_grid_num - self.seen_area)
        reward = area_reward * self.area_reward_scale
        if collision_flag:
            reward -= self.collision_penalty
        reward -= self.step_penalty
        self.seen_area = filled_grid_num
        raw_reward = {'area': area_reward, 'collision_flag': collision_flag}
        return reward, filled_grid_num, raw_reward

    def get_loc_map(self):
        top_down_map = self.grids_mat.T.copy()

        half_size = max(top_down_map.shape[0], top_down_map.shape[1],
                        self.configs['large_map_range']) * 3
        ego_map = np.ones(
            (half_size * 2, half_size * 2, 3), dtype=np.uint8) * 255
        loc_map = np.zeros((top_down_map.shape[0], top_down_map.shape[1], 3),
                           dtype=np.uint8)
        loc_map[top_down_map == 0] = np.array([255, 255, 255])
        loc_map[top_down_map == 1] = np.array([0, 0, 255])
        loc_map[top_down_map == 2] = np.array([0, 255, 0])
        current_pos, grid_current_pos = self.get_camera_grid_pos()
        x_start = half_size - grid_current_pos[1]
        y_start = half_size - grid_current_pos[0]
        x_end = x_start + top_down_map.shape[0]
        y_end = y_start + top_down_map.shape[1]
        assert x_start >= 0 and y_start >= 0 and \
               x_end <= ego_map.shape[0] and y_end <= ego_map.shape[1]
        ego_map[x_start:x_end, y_start:y_end] = loc_map
        center = (half_size, half_size)
        rot_angle = self.constrain_to_pm_pi(90 + current_pos[2])
        M = cv2.getRotationMatrix2D(center, rot_angle, 1.0)
        ego_map = cv2.warpAffine(ego_map,
                                 M, (ego_map.shape[1], ego_map.shape[0]),
                                 flags=cv2.INTER_AREA,
                                 borderMode=cv2.BORDER_CONSTANT,
                                 borderValue=(255, 255, 255))
        start = half_size - self.configs['small_map_range']
        end = half_size + self.configs['small_map_range']
        small_ego_map = ego_map[start:end, start:end]

        start = half_size - self.configs['large_map_range']
        end = half_size + self.configs['large_map_range']
        assert start >= 0
        assert end <= ego_map.shape[0]
        large_ego_map = ego_map[start:end, start:end]
        return cv2.resize(
            large_ego_map,
            (self.configs['large_map_size'], self.configs['large_map_size']),
            interpolation=cv2.INTER_AREA), small_ego_map

    def safe_assign(self, im_map, x_idx, y_idx, value):
        try:
            im_map[x_idx, y_idx] = value
        except IndexError:
            valid_idx1 = np.logical_and(x_idx >= 0, x_idx < im_map.shape[0])
            valid_idx2 = np.logical_and(y_idx >= 0, y_idx < im_map.shape[1])
            valid_idx = np.logical_and(valid_idx1, valid_idx2)
            im_map[x_idx[valid_idx], y_idx[valid_idx]] = value

    def constrain_to_pm_pi(self, theta):
        # make sure theta is within [-180, 180)
        return (theta + 180) % 360 - 180

    def get_camera_grid_pos(self):
        current_pos = np.array([
            self.env.cam.pos.x, self.env.cam.pos.z,
            self.constrain_to_pm_pi(self.env.cam.yaw)
        ])
        grid_pos = np.array(
            self.env.house.to_grid(current_pos[0], current_pos[1]))
        return current_pos, grid_pos

    def truncated_norm(self, mu, sigma, lower_limit, upper_limit, size):
        if sigma == 0:
            return mu
        lower_limit = (lower_limit - mu) / sigma
        upper_limit = (upper_limit - mu) / sigma
        r = truncnorm(lower_limit, upper_limit, loc=mu, scale=sigma)
        return r.rvs(size)
    samples_per_room = 20
    screen_size = (512, 512)
    cfg = create_configuration(deep_rl.configuration.get('house3d').as_dict())
    room_target_object = load_target_object_data(cfg['roomTargetFile'])

    api = objrender.RenderAPI(w=screen_size[1], h=screen_size[0], device=0)
    for i, houseID in enumerate(houses):
        print('Processing house %s (%s/%s)' % (houseID, i + 1, len(houses)))
        house = create_house(houseID, cfg)
        env = Environment(api, house, cfg)
        types_to_rooms = get_valid_room_dict(house)
        
        for room_type, rooms in types_to_rooms.items():
            print('Processing house %s (%s/%s) - %s' % (houseID, i + 1, len(houses), room_type))
            
            locations = get_valid_locations(house, rooms)
            if len(locations) == 0:
                print('ERROR: no locations for room %s' % room_type)
                continue

            for j in range(samples_per_room):
                #print_progress(0, samples_per_room)
                location = sample_true_object(room_target_object, env, house, locations, room_type)
                if location is not None:
                    env.reset(*location[1])
                    render_current_location(env, houseID, room_type, j, cfg)

            #print_progress(samples_per_room, samples_per_room)

        
Example #10
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--data_file',
                        default='../path_data/eqa_humans.json',
                        type=str)
    parser.add_argument('--demo_id', default=0, type=int)
    parser.add_argument('--width', type=int, default=600)
    parser.add_argument('--height', type=int, default=450)
    parser.add_argument('--no_display',
                        action='store_true',
                        help='disable display on screen')
    parser.add_argument('--play_mode',
                        default='auto',
                        type=str,
                        choices=['key', 'auto'],
                        help='play the human demonstration in '
                        'keyboard-control mode or auto-play mode')
    args = parser.parse_args()
    with open(args.data_file, 'r') as f:
        data = json.load(f)
    demo = data[args.demo_id]
    print('Total time steps:', len(demo['loc']))
    locs = np.array(demo['loc'][1:])
    ques = demo['question']
    answer = demo['answer']
    text = 'Q: {0:s}   A: {1:s}'.format(ques, answer)
    text_height = 60
    auto_freq = 15  # 15 hz

    cfg = load_config('config.json')
    api = objrender.RenderAPI(w=args.width, h=args.height, device=0)
    env = Environment(api, demo['house_id'], cfg, GridDet=0.05)

    save_folder = 'demo_{0:d}'.format(args.demo_id)
    os.makedirs(save_folder, exist_ok=True)

    if not args.no_display:
        print_keyboard_help(args.play_mode)
        t = 0
        while True:
            # print(t)
            if t < len(locs):
                env.reset(x=locs[t][0], y=locs[t][2], yaw=locs[t][3])
            mat = get_mat(env, text_height, text)
            cv2.imshow("human_demo", mat)
            if args.play_mode == 'key':
                key = cv2.waitKey(0)
                if key == 27 or key == ord('q'):  # esc
                    break
                elif key == ord('n'):
                    if t < len(locs) - 1:
                        t += 1
                elif key == ord('p'):
                    if t > 0:
                        t -= 1
                elif key == ord('r'):
                    t = 0
                else:
                    print("Unknown key: {}".format(key))
            else:
                key = cv2.waitKey(int(1000 / auto_freq))
                if key == 27 or key == ord('q'):  # esc
                    break
                elif key == -1:
                    if t < len(locs) - 1:
                        t += 1
                elif key == ord('r'):
                    t = 0
                    auto_freq = 15
                elif key == ord('i'):
                    if auto_freq < 30:
                        auto_freq += 4
                elif key == ord('d'):
                    if auto_freq > 3:
                        auto_freq -= 4
                else:
                    print("Unknown key: {}".format(key))
Example #11
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--data_file',
                        default='../path_data/cleaned_human_demo_data.json',
                        type=str)
    parser.add_argument('--demo_id', default=0, type=int)
    parser.add_argument('--width', type=int, default=600)
    parser.add_argument('--height', type=int, default=450)
    parser.add_argument('--filter_height', type=bool, default=True)
    args = parser.parse_args()
    with open(args.data_file, 'r') as f:
        data = json.load(f)
    demo = data[args.demo_id]
    print('Total time steps:', len(demo['loc']))
    locs = np.array(demo['loc'][1:])
    ques = demo['question']
    answer = demo['answer']
    text = 'Q: {0:s}   A: {1:s}'.format(ques, answer)
    text_height = 60

    cfg = load_config('config.json')
    api = objrender.RenderAPI(w=args.width, h=args.height, device=0)
    env = Environment(api, demo['house_id'], cfg)

    L_min = env.house.L_min_coor
    L_max = env.house.L_max_coor
    L_min = np.array([[env.house.L_lo[0], L_min[1], env.house.L_lo[1]]])
    L_max = np.array([[env.house.L_hi[0], L_max[1], env.house.L_hi[1]]])
    grid_size = env.house.grid_det
    n_row = env.house.n_row
    grid_num = np.array([
        n_row[0] + 1,
        int((L_max[0][1] - L_min[0][1]) / (grid_size + 1e-8)) + 1, n_row[1] + 1
    ])
    print('Grid size:', grid_size)
    print('Number of grid in [x, y, z]:', grid_num)

    all_grids = np.zeros(tuple(grid_num), dtype=bool)
    grid_colors = np.zeros(tuple(grid_num) + (3, ), dtype=np.uint8)
    loc_map = env.gen_locmap()
    obs_map = env.house.obsMap.T
    obs_pos = obs_map == 1
    for t in tqdm(range(len(locs))):
        env.reset(x=locs[t][0], y=locs[t][2], yaw=locs[t][3])
        depth = env.render(RenderMode.DEPTH)
        rgb = env.render(RenderMode.RGB)
        semantic = env.render(RenderMode.SEMANTIC)
        infmask = depth[:, :, 1]
        depth = depth[:, :, 0] * (infmask == 0)
        true_depth = depth.astype(np.float32) / 255.0 * 20.0
        extrinsics = env.cam.getExtrinsicsNumpy()
        points, points_colors = gen_point_cloud(true_depth, rgb, extrinsics)
        grid_locs = np.floor((points - L_min) / grid_size).astype(int)
        all_grids[grid_locs[:, 0], grid_locs[:, 1], grid_locs[:, 2]] = True
        grid_colors[grid_locs[:, 0], grid_locs[:, 1],
                    grid_locs[:, 2]] = points_colors
        depth = np.stack([depth] * 3, axis=2)

        loc_map[grid_locs[:, 2], grid_locs[:,
                                           0], :] = np.array([250, 120, 120])
        loc_map[obs_pos] = 0
        rad = env.house.robotRad / env.house.grid_det
        x, y = env.cam.pos.x, env.cam.pos.z
        x, y = env.house.to_grid(x, y)
        loc_map_cp = loc_map.copy()
        cv2.circle(loc_map_cp, (x, y), int(rad), (0, 0, 255), thickness=-1)
        loc_map_resized = cv2.resize(loc_map_cp, env.resolution)
        concat1 = np.concatenate((rgb, semantic), axis=1)
        concat2 = np.concatenate((depth, loc_map_resized), axis=1)
        ret = np.concatenate((concat1, concat2), axis=0)
        ret = ret[:, :, ::-1]
        pad_text = np.ones((text_height, ret.shape[1], ret.shape[2]),
                           dtype=np.uint8)
        mat_w_text = np.concatenate((np.copy(ret), pad_text), axis=0)
        cv2.putText(mat_w_text, text, (10, mat_w_text.shape[0] - 10), FONT, 1,
                    (255, 255, 255), 2, cv2.LINE_AA)

        cv2.imshow("human_demo", mat_w_text)
        cv2.waitKey(20)

    valid_grids = np.argwhere(all_grids)
    valid_grid_center = valid_grids * grid_size + L_min
    valid_grid_color = grid_colors[valid_grids[:, 0], valid_grids[:, 1],
                                   valid_grids[:, 2]]

    if args.filter_height:
        height_lim = [-0.1, 2.3]
        valid_idx = np.logical_and(valid_grid_center[:, 1] >= height_lim[0],
                                   valid_grid_center[:, 1] <= height_lim[1])

        valid_grid_center = valid_grid_center[valid_idx]
        valid_grid_color = valid_grid_color[valid_idx]
        valid_grids = valid_grids[valid_idx]

    loc_map = env.gen_locmap()

    obs_map = env.house.obsMap.T
    loc_map[valid_grids[:, 2], valid_grids[:,
                                           0], :] = np.array([250, 120, 120])
    loc_map[obs_map == 1] = 0

    loc_map = cv2.resize(loc_map, env.resolution)
    cv2.imwrite('seen_map.png', loc_map)
    if USE_OPEN3D:
        pcd = PointCloud()
        pcd.points = Vector3dVector(valid_grid_center)
        pcd.colors = Vector3dVector(valid_grid_color / 255.0)
        coord = create_mesh_coordinate_frame(3, [40, 0, 35])
        draw_geometries([pcd, coord])
Example #12
0
house_ids = [
    '04f4590d85e296b4c81c5a62f8a99bce', '0880799c157b4dff08f90db221d7f884',
    '0a12acd2a7039cb122c297fa5b145912', '0b9285c9f090123caaae85500d48ea8f',
    '0c90efff2ab302c6f31add26cd698bea', '1dba3a1039c6ec1a3c141a1cb0ad0757',
    '5cf0e1e9493994e483e985c436b9d3bc', '775941abe94306edc1b5820e3a992d75'
    # 'a7e248efcdb6040c92ac0cdc3b2351a6',
    # 'f10ce4008da194626f38f937fb9c1a03'
]
if __name__ == '__main__':
    api = objrender.RenderAPI(w=600, h=450, device=0)
    cfg = load_config(CONFIGFILEPATH)
    houseid = '5cf0e1e9493994e483e985c436b9d3bc'
    env = Environment(api, houseid, cfg)
    # '0a0b9b45a1db29832dd84e80c1347854'
    env.reset(yaw=0)  # put the agent into the house

    # fourcc = cv2.VideoWriter_fourcc(*'X264')
    # writer = cv2.VideoWriter('out.avi', fourcc, 30, (1200, 900))
    with open('%s/%s/map.txt' % (HOUSEDIR, houseid), 'rb') as mapfile:
        origin_coor = [
            float(i) for i in mapfile.readline().strip('\n').split(' ')
        ]
    smap = env.house.smap
    fmap = env.gen_2dfmap()
    L_lo = env.house.L_lo
    L_hi = env.house.L_hi
    L_det = env.house.L_det
    n_row = env.house.n_row
    robotRad = env.house.robotRad
    savedata = {
Example #13
0
class House3DEnv(gym.Env):
    def __init__(self,
                 train_mode=True,
                 area_reward_scale=0.0005,
                 collision_penalty=0.01,
                 step_penalty=0.0,
                 max_depth=2.0,
                 render_door=False,
                 start_indoor=True,
                 ignore_collision=False,
                 ob_dilation_kernel=5,
                 depth_signal=True,
                 max_steps=500):
        self.seed()
        self.configs = get_configs()
        self.env = None

        self.train_mode = train_mode
        self.render_door = render_door
        self.ignore_collision = ignore_collision
        self.start_indoor = start_indoor
        self.render_height = self.configs['render_height']
        self.render_width = self.configs['render_width']
        self.ob_dilation_kernel = ob_dilation_kernel
        self.config = load_config(self.configs['path'],
                                  prefix=self.configs['par_path'])
        self.move_sensitivity = self.configs['move_sensitivity']
        self.rot_sensitivity = self.configs['rot_sensitivity']
        self.train_houses = self.configs['train_houses']
        self.test_houses = self.configs['test_houses']

        if train_mode:
            self.houses_id = self.train_houses
        else:
            self.houses_id = self.test_houses
        self.depth_threshold = (0, max_depth)
        self.area_reward_scale = area_reward_scale
        self.collision_penalty = collision_penalty
        self.step_penalty = step_penalty
        self.max_step = max_steps

        n_channel = 3
        if depth_signal:
            n_channel += 1

        self.observation_shape = (self.render_width, self.render_height,
                                  n_channel)
        self.observation_space = spaces.Box(0,
                                            255,
                                            shape=self.observation_shape,
                                            dtype=np.uint8)

        #self.action_space = spaces.Discrete(n_discrete_actions)
        self.action_space = spaces.Box(low=np.array([0.0, -1.0, -1.0]),
                                       high=np.array([1.0, 1.0, 1.0]),
                                       dtype=np.float32)

        self.tracker = []
        self.num_rotate = 0
        self.right_rotate = 0

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random()
        return [seed]

    def constrain_to_pm_pi(self, theta):
        return (theta + 180) % 360 - 180

    def get_camera_grid_pos(self):
        current_pos = np.array([
            self.env.cam.pos.x, self.env.cam.pos.z,
            self.constrain_to_pm_pi(self.env.cam.yaw)
        ])
        grid_pos = np.array(
            self.env.house.to_grid(current_pos[0], current_pos[1]))
        return current_pos, grid_pos

    def get_obs(self):
        self.env.set_render_mode('rgb')
        rgb = self.env.render()
        self.env.set_render_mode('depth')
        depth = self.env.render()
        infmask = depth[:, :, 1]
        depth = depth[:, :, 0] * (infmask == 0)
        true_depth = depth.astype(np.float32) / 255.0 * 20.0
        extrinsics = self.env.cam.getExtrinsicsNumpy()
        return rgb, np.expand_dims(depth, -1), true_depth, extrinsics

    def safe_assign(self, im_map, x_idx, y_idx, value):
        try:
            im_map[x_idx, y_idx] = value
        except IndexError:
            valid_idx1 = np.logical_and(x_idx >= 0, x_idx < im_map.shape[0])
            valid_idx2 = np.logical_and(y_idx >= 0, y_idx < im_map.shape[1])
            valid_idx = np.logical_and(valid_idx1, valid_idx2)
            im_map[x_idx[valid_idx], y_idx[valid_idx]] = value

    def get_seen_area(self, rgb, depth, extrinsics, out_mat, inv_E=True):
        points, points_colors = gen_point_cloud(
            depth,
            rgb,
            extrinsics,
            depth_threshold=self.depth_threshold,
            inv_E=inv_E)
        grid_locs = np.floor(
            (points[:, [0, 2]] - self.L_min) / self.grid_size).astype(int)
        grids_mat = np.zeros(
            (self.grids_mat.shape[0], self.grids_mat.shape[1]), dtype=np.uint8)

        high_filter_idx = points[:, 1] < HEIGHT_THRESHOLD[1]
        low_filter_idx = points[:, 1] > HEIGHT_THRESHOLD[0]
        obstacle_idx = np.logical_and(high_filter_idx, low_filter_idx)

        self.safe_assign(grids_mat, grid_locs[high_filter_idx, 0],
                         grid_locs[high_filter_idx, 1], 2)
        kernel = np.ones((3, 3), np.uint8)
        grids_mat = cv2.morphologyEx(grids_mat, cv2.MORPH_CLOSE, kernel)

        obs_mat = np.zeros((self.grids_mat.shape[0], self.grids_mat.shape[1]),
                           dtype=np.uint8)
        self.safe_assign(obs_mat, grid_locs[obstacle_idx, 0],
                         grid_locs[obstacle_idx, 1], 1)
        kernel = np.ones((self.ob_dilation_kernel, self.ob_dilation_kernel),
                         np.uint8)
        obs_mat = cv2.morphologyEx(obs_mat, cv2.MORPH_CLOSE, kernel)
        obs_idx = np.where(obs_mat == 1)
        self.safe_assign(grids_mat, obs_idx[0], obs_idx[1], 1)

        out_mat[np.where(grids_mat == 2)] = 2
        out_mat[np.where(grids_mat == 1)] = 1

        seen_area = np.sum(out_mat > 0)
        #cal_seen_area = np.sum(out_mat == 1)
        return seen_area

    def cal_reward(self, rgb, depth, extrinsics, collision_flag):
        if collision_flag:
            reward = -1.0 * self.collision_penalty
            area_reward = 0.0
            filled_grid_num = self.seen_area
        else:
            filled_grid_num = self.get_seen_area(rgb,
                                                 depth,
                                                 extrinsics,
                                                 self.grids_mat,
                                                 inv_E=True)
            area_reward = (filled_grid_num - self.seen_area)
            reward = area_reward * self.area_reward_scale
            reward -= self.step_penalty
        self.seen_area = filled_grid_num
        raw_reward = {'area': area_reward, 'collision_flag': collision_flag}
        return reward, filled_grid_num, raw_reward

    def reset(self, house_id=None, x=None, y=None, yaw=None):
        if not self.train_mode:
            obs_map = self.env.house.obsMap.T
            self.obs_pos = obs_map == 1
            self.traj = []
            self.traj_actions = []
            self.grid_traj = []

        if house_id is None:
            house_id = self.np_random.choice(self.houses_id, 1)[0]
        self.hid = house_id
        if self.env is not None:
            del self.api
            del self.env
        self.api = objrender.RenderAPI(self.render_width,
                                       self.render_height,
                                       device=RENDERING_GPU)

        self.env = Environment(self.api,
                               house_id,
                               self.config,
                               GridDet=self.configs['GridDet'],
                               RenderDoor=self.render_door,
                               StartIndoor=self.start_indoor)

        self.tracker = []
        self.num_rotate = 0
        self.right_rotate = 0
        self.L_min = self.env.house.L_lo
        self.L_max = self.env.house.L_hi
        self.grid_size = self.env.house.grid_det
        grid_num = np.array(
            [self.env.house.n_row[0] + 1, self.env.house.n_row[1] + 1])
        self.grids_mat = np.zeros(tuple(grid_num), dtype=np.uint8)
        self.max_grid_size = np.max(grid_num)
        self.max_seen_area = float(np.prod(grid_num))
        self.env.reset(x=x, y=y, yaw=yaw)
        self.start_pos, self.grid_start_pos = self.get_camera_grid_pos()
        if not self.train_mode:
            self.traj.append(self.start_pos.tolist())
            self.grid_traj.append(self.grid_start_pos.tolist())

        rgb, depth, true_depth, extrinsics = self.get_obs()
        self.seen_area = self.get_seen_area(rgb, true_depth, extrinsics,
                                            self.grids_mat)

        self.ep_len = 0
        self.ep_reward = 0
        self.collision_times = 0
        ret_obs = np.concatenate((rgb, depth), axis=-1)
        return ret_obs

    def motion_primitive(self, action):
        collision_flag = False
        det_fwd = np.clip(action[0] + 0.8, 0.0, 2.0) / 2.0
        tmp_alpha = 0.0
        if action[2] > 0:
            tmp_alpha = 1.0
        elif action[2] < 0:
            tmp_alpha = -1.0
        det_rot = np.clip(action[1] + 0.8, 0.0, 1.6) * 0.5 * tmp_alpha
        move_fwd = det_fwd * self.move_sensitivity
        rotation = det_rot * self.rot_sensitivity

        if not self.env.move_forward(move_fwd, 0.0):
            collision_flag = True
        else:
            self.env.rotate(rotation)

        return collision_flag

    def step(self, action):
        collision_flag = self.motion_primitive(action)

        rgb, depth, true_depth, extrinsics = self.get_obs()

        current_pos, grid_current_pos = self.get_camera_grid_pos()
        if not self.train_mode:
            self.traj_actions.append(int(action))
            self.traj.append(current_pos.tolist())
            self.grid_traj.append(grid_current_pos.tolist())

        self.tracker.append([
            grid_current_pos[0], grid_current_pos[1], self.env.cam.front.x,
            self.env.cam.front.z, self.env.cam.right.x, self.env.cam.right.z
        ])

        if action[1] > -0.8 and action[2] > 0:
            self.right_rotate += 1

        if action[1] > -0.8 and action[2] != 0:
            self.num_rotate += 1

        reward, seen_area, raw_reward = self.cal_reward(
            rgb, true_depth, extrinsics, collision_flag)

        self.ep_len += 1
        self.ep_reward += reward
        if collision_flag:
            self.collision_times += 1
        info = {
            'reward_so_far': self.ep_reward,
            'steps_so_far': self.ep_len,
            'seen_area': seen_area,
            'collisions': self.collision_times,
            'start_pose': self.start_pos,
            'house_id': self.hid,
            'collision_flag': collision_flag,
            'grid_current_pos': grid_current_pos - self.grid_start_pos,
            'current_rot': current_pos[2] - self.start_pos[2]
        }
        info = {**info, **raw_reward, **self.info}

        if self.ep_len >= self.max_step:
            done = True
            info.update({'bad_transition': True})
        else:
            done = False
        ret_obs = np.concatenate((rgb, depth), axis=-1)

        return ret_obs, reward, done, info

    @property
    def house(self):
        return self.env.house

    @property
    def info(self):
        ret = self.env.info
        ret['track'] = self.tracker
        ret['total_rotate'] = self.num_rotate
        ret['right_rotate'] = self.right_rotate
        if not self.train_mode:
            ret['traj_actions'] = self.traj_actions
            ret['traj'] = self.traj
            ret['grid_traj'] = self.grid_traj
        return ret