def main(args): c = Controller() c.start() sence = args.sence c.reset("FloorPlan" + str(sence)) t = get_agent_map_data(c) new_frame = add_agent_view_triangle( position_to_tuple(c.last_event.metadata["agent"]["position"]), c.last_event.metadata["agent"]["rotation"]["y"], t["frame"], t["pos_translator"], ) plt.imshow(new_frame) plt.axis('off') plt.savefig('topview.png') plt.show() im = Image.open('topview.png') im = trim(im) im.save('topview.png')
self._update_heatmap(agent_position) overlay = self.cmap(self.heatmap) overlay = (overlay[..., :3] * 255).astype('int') normal_frame = self.topdown_map.copy() semantic_frame = self.topdown_map_semantic.copy() mask = self.heatmap > self.base_heat normal_frame[mask] = overlay[mask] semantic_frame[mask] = overlay[mask] # return (np.fliplr(np.flipud(normal_frame)), # np.fliplr(np.flipud(semantic_frame))) # return (cv2.rotate(normal_frame, cv2.ROTATE_90_COUNTERCLOCKWISE), # cv2.rotate(semantic_frame, cv2.ROTATE_90_COUNTERCLOCKWISE)) return normal_frame, semantic_frame if __name__ == "__main__": controller = Controller() controller.start() controller.reset("FloorPlan1") drawer = TrajectoryDrawer(controller) new_frame = drawer.draw( controller.last_event.metadata["agent"]["position"]) plt.imshow(new_frame) plt.show()
class Environment: """ Abstraction of the ai2thor enviroment. """ def __init__(self, grid_size=0.25, fov=90.0, local_executable_path=None, randomize_objects=False, seed=1): random.seed(seed) self.controller = Controller() if local_executable_path: self.controller.local_executable_path = local_executable_path self.grid_size = grid_size self._reachable_points = {} self.fov = fov self.offline_data_dir = './datasets/floorplans' self.y = None self.randomize_objects = randomize_objects self.stored_scene_name = None @property def scene_name(self): return self.controller.last_event.metadata['sceneName'] @property def current_frame(self): return self.controller.last_event.frame @property def last_event(self): return self.controller.last_event @property def last_action_success(self): return self.controller.last_event.metadata['lastActionSuccess'] def object_is_visible(self, objId): objects = self.last_event.metadata['objects'] visible_objects = [o['objectId'] for o in objects if o['visible']] return objId in visible_objects def start(self, scene_name, gpu_id): """ Begin the scene. """ # self.controller.start(x_display=str(gpu_id)) self.controller.start() self.controller.step({ 'action': 'ChangeQuality', 'quality': 'Very Low' }) self.controller.step({ "action": "ChangeResolution", "x": 224, "y": 224, }) self.controller.reset(scene_name) self.controller.step( dict(action='Initialize', gridSize=self.grid_size, fieldOfView=self.fov)) self.y = self.controller.last_event.metadata['agent']['position']['y'] self.controller.step(dict(action='ToggleHideAndSeekObjects')) self.randomize_agent_location() self.controller.step( dict(action='InitialRandomSpawn', forceVisible=True, maxNumRepeats=10, randomSeed=0)) def reset(self, scene_name): """ Reset the scene. """ self.controller.reset(scene_name) self.controller.step( dict(action='Initialize', gridSize=self.grid_size, fieldOfView=self.fov)) self.y = self.controller.last_event.metadata['agent']['position']['y'] self.controller.step(dict(action='ToggleHideAndSeekObjects')) self.randomize_agent_location() if self.randomize_objects: self.controller.step( dict(action='InitialRandomSpawn', forceVisible=True, maxNumRepeats=10, randomSeed=random.randint(0, 1000000))) else: self.controller.step( dict(action='InitialRandomSpawn', forceVisible=True, maxNumRepeats=10, randomSeed=0)) def all_objects(self): objects = self.controller.last_event.metadata['objects'] return [o['objectId'] for o in objects] def fail(self): self.controller.last_event.metadata['lastActionSuccess'] = False return self.controller.last_event def step(self, action_dict): curr_state = ThorAgentState.get_state_from_evenet( event=self.controller.last_event, forced_y=self.y) next_state = get_next_state(curr_state, action_dict['action'], copy_state=True) if action_dict['action'] in [ 'LookUp', 'LookDown', 'RotateLeft', 'RotateRight', 'MoveAhead', 'LookTomato', 'LookBowl' ]: if next_state is None: self.last_event.metadata['lastActionSuccess'] = False else: event = self.controller.step( dict(action='Teleport', x=next_state.x, y=next_state.y, z=next_state.z)) s1 = event.metadata['lastActionSuccess'] event = self.controller.step( dict(action='Rotate', rotation=next_state.rotation)) s2 = event.metadata['lastActionSuccess'] event = self.controller.step( dict(action="Look", horizon=next_state.horizon)) s3 = event.metadata['lastActionSuccess'] if not (s1 and s2 and s3): # Go back to previous state. self.teleport_agent_to(curr_state.x, curr_state.y, curr_state.z, curr_state.rotation, curr_state.horizon) self.last_event.metadata['lastActionSuccess'] = False elif action_dict['action'] != 'Done': return self.controller.step(action_dict) def teleport_agent_to(self, x, y, z, rotation, horizon): """ Teleport the agent to (x,y,z) with given rotation and horizon. """ self.controller.step(dict(action='Teleport', x=x, y=y, z=z)) self.controller.step(dict(action='Rotate', rotation=rotation)) self.controller.step(dict(action="Look", horizon=horizon)) def random_reachable_state(self): """ Get a random reachable state. """ xyz = random.choice(self.reachable_points) rotation = random.choice([0, 90, 180, 270]) horizon = random.choice([0, 30, 330]) state = copy.copy(xyz) state['rotation'] = rotation state['horizon'] = horizon return state def randomize_agent_location(self): state = self.random_reachable_state() self.teleport_agent_to(**state) self.start_state = copy.deepcopy(state) return @property def reachable_points(self): """ Use the JSON file to get the reachable points. """ if self.scene_name in self._reachable_points: return self._reachable_points[self.scene_name] points_path = os.path.join(self.offline_data_dir, self.scene_name, "grid.json") if not os.path.exists(points_path): raise IOError("Path {0} does not exist".format(points_path)) self._reachable_points[self.scene_name] = json.load(open(points_path)) return self._reachable_points[self.scene_name]
def live_test(testing_scene, test_objects, shared_model, config, arguments=dict()): f = h5py.File( '/home/chuong/Desktop/Robot/tailong/RL-target-driven-navigation-ai2thor/dumped/' + testing_scene + '.hdf5', "r") states = f['locations'][()] model = shared_model if model is not None: model.eval() # test_object = np.random.choice(test_objects) test_object = 'Desk' env = AI2ThorDumpEnv(testing_scene, test_object, config, arguments) print(arguments['angle']) #--------- controller = Controller() controller.start() controller.reset(testing_scene) drawer = TrajectoryDrawer(controller) new_test_object = None while 1: if new_test_object is not None and new_test_object != test_object: print("Finding {} ..".format(new_test_object)) env = AI2ThorDumpEnv(testing_scene, new_test_object, config, arguments) else: print("Finding {} ..".format(test_object)) state, score, target = env.reset() start = env.current_state_id done = True stop = 0 for step in range(arguments['num_iters']): ob = env.observations[env.current_state_id] cv2.imshow("Live Test", cv2.resize(ob[:, :, ::-1], (400, 400))) new_frame = drawer.draw({ 'x': states[env.current_state_id][0], 'z': states[env.current_state_id][1] }) cv2.imshow( "topview", cv2.cvtColor(new_frame.astype(np.uint8), cv2.COLOR_RGB2BGR)) time.sleep(0.1) k = cv2.waitKey(33) if k == ord('r'): # press q to escape new_test_object_id = int( input("Specify target: {}\n".format( list(zip(range(len(test_objects)), test_objects))))) new_test_object = test_objects[new_test_object_id] break elif k == ord('q'): # press q to escape sys.exit("End live test.") if model is not None: with torch.no_grad(): value, logit = model(state, score, target) prob = F.softmax(logit, dim=-1) action = prob.max(1, keepdim=True)[1].numpy()[0, 0] # action = prob.multinomial(num_samples=1).detach().numpy()[0, 0] else: action = np.random.choice(range(arguments['action_size'])) print("Action: {}".format( ['Move Forward', 'Move Backward', 'Turn Right', 'Turn Left'][action])) state, score, reward, done = env.step(action) if env.collided: print("Collision occurs.") # a quick hack to prevent the agent from stucking # i.e. in test mode an agent can repeat an action ad infinitum if done: stop += 1 if stop == 2: new_test_object_id = int( input("Specify target: {}\n".format( list(zip(range(len(test_objects)), test_objects))))) new_test_object = test_objects[new_test_object_id] stop = 0 break if not done: print("Fail") else: print("Success with {} redundant steps.".format( step + 1 - env.shortest[start, env.current_state_id]))
from ai2thor.controller import Controller c = Controller() c.start() event = c.step(dict(action="MoveAhead")) assert event.frame.shape == (300, 300, 3) print(event.frame.shape) print("Everything works!!!")
class Environment(object): def __init__(self, fov=60.0, camera_Y=0.675, grid_size=0.25, visibility_distance=1.5, player_screen_width=300, player_screen_height=300, full_scrn=False, depth_image=False, class_image=False, top_view_cam=False, object_image=False, third_party_cam=False, scene="FloorPlan220", object_name="Television"): self.scene = scene self.grid_size = grid_size self.depth_image = depth_image self.class_image = class_image self.object_image = object_image self.visibility_distance = visibility_distance self.camera_Y = camera_Y self.fov = fov self.object_name = object_name self.player_screen_width = player_screen_width self.player_screen_height = player_screen_height self.top_view_cam = top_view_cam self.third_party_cam = third_party_cam self.full_scrn = full_scrn self.ctrl = Controller() def make(self): self.ctrl.start()#x_display="50" def reset(self): self.ctrl.reset(self.scene) #self.ctrl.step(dict(action="ToggleMapView")) self.ctrl.step(dict(action="Initialize", gridSize=self.grid_size, renderDepthImage=self.depth_image, renderClassImage=self.class_image, renderObjectImage=self.object_image, visibilityDistance=self.visibility_distance, cameraY=self.camera_Y, fieldOfView=self.fov)) agent_position = np.array(list(self.ctrl.last_event.metadata["agent"]["position"].values())) self.obj = self.ctrl.last_event.metadata["objects"] for obj in self.obj: if self.object_name in obj["name"]: obj_ = obj self.obj_pos = obj["position"] self.visible = obj["visible"] self.obj_agent_dis = obj["distance"] first_person_obs = self.ctrl.last_event.frame return first_person_obs, agent_position, self.object_name, self.obj_pos, self.obj_agent_dis def take_action(self, action): # move right if action == 0: self.ctrl.step(dict(action="MoveRight")) if self.ctrl.last_event.metadata["lastActionSuccess"]: reward = 0 if self.visible: reward = 1 else: reward = -1 # right rotate elif action == 1: event = self.ctrl.step(dict(action='RotateRight')) if self.ctrl.last_event.metadata["lastActionSuccess"]: reward = 0 if self.visible: reward = 1 else: reward = -1 # left rotate elif action == 2: self.ctrl.step(dict(action="RotateLeft")) if self.ctrl.last_event.metadata["lastActionSuccess"]: reward = 0 if self.visible: reward = 1 else: reward = -1 # move left elif action == 3: self.ctrl.step(dict(action='MoveLeft')) if self.ctrl.last_event.metadata["lastActionSuccess"]: reward = 0 if self.visible: reward = 1 else: reward = -1 # move Ahead elif action == 4: self.ctrl.step(dict(action="MoveAhead")) if self.ctrl.last_event.metadata["lastActionSuccess"]: reward = 0 if self.visible: reward = 1 else: reward = -1 # Move back else: # move_back action self.ctrl.step(dict(action="MoveBack")) if self.ctrl.last_event.metadata["lastActionSuccess"]: reward = 0 if self.visible: reward = 1 else: reward = -1 # 1st person camera first_person_obs = self.ctrl.last_event.frame # Third party_cam "From top" third_cam_obs = self.ctrl.last_event.third_party_camera_frames # third_cam_obs = np.squeeze(third_cam_obs, axis=0) # done condition when the last action was successful inverted done = not self.ctrl.last_event.metadata["lastActionSuccess"] # agent position agent_position = np.array(list(self.ctrl.last_event.metadata["agent"]["position"].values())) return first_person_obs, agent_position, done, reward
class Environment(object): def __init__(self, fov=60.0, action_n=6, camera_Y=0.675, grid_size=0.20, visibility_distance=1.5, player_screen_width=300, player_screen_height=300, full_scrn=False, depth_image=False, class_image=False, top_view_cam=False, object_image=False, third_party_cam=False, random_init=False, random_goals=False, scene="FloorPlan220", object_name="Television"): # self.scene = scene self.grid_size = grid_size self.depth_image = depth_image self.class_image = class_image self.object_image = object_image self.visibility_distance = visibility_distance self.camera_Y = camera_Y self.fov = fov self.scene = scene self.object_name = object_name self.player_screen_width = player_screen_width self.player_screen_height = player_screen_height self.top_view_cam = top_view_cam self.third_party_cam = third_party_cam self.full_scrn = full_scrn self.random_init = random_init self.orientations = [0.0, 90.0, 180.0, 270.0, 360.0] self.action_n = action_n self.random_goal = random_goals self.ctrl = Controller() # headless=True def make(self): self.ctrl.start() # x_display="50" self.ctrl.reset(self.scene) def reset(self): random_goal_position = 0 self.ctrl.reset(self.scene) if self.top_view_cam: self.ctrl.step(dict(action="ToggleMapView")) self.ctrl.step( dict(action="Initialize", gridSize=self.grid_size, renderDepthImage=self.depth_image, renderClassImage=self.class_image, renderObjectImage=self.object_image, visibilityDistance=self.visibility_distance, cameraY=self.camera_Y, fieldOfView=self.fov)) if self.random_init: agent_random_spwan = self.agent_random_init() if self.random_goal: random_goal_position = self.random_goal_position() agent_position, agent_rotation, agent_pose = self.agent_properties() # object position, visibility nad distance from agent to specified object obj_position, obj_visibility, obj_agent_distance = self.object_properties( ) #goal = list(obj_position.values()) try: np.array_equal( np.array( list(self.ctrl.last_event.metadata["agent"] ["position"].values())), agent_position) except: print( "agent init position does not equal to agent position attribute" ) first_person_obs = self.ctrl.last_event.frame return first_person_obs, agent_position, random_goal_position, obj_agent_distance, agent_pose, self.object_name def step(self, action, obj_agent_dist): # move right if action == 0: self.ctrl.step(dict(action="MoveRight")) reward, done, visible, obj_agent_dis_, first_person_obs, collision = self.post_action_state( obj_agent_dist) # right rotate elif action == 1: self.ctrl.step(dict(action='RotateRight')) reward, done, visible, obj_agent_dis_, first_person_obs, collision = self.post_action_state( obj_agent_dist) # left rotate elif action == 2: self.ctrl.step(dict(action="RotateLeft")) reward, done, visible, obj_agent_dis_, first_person_obs, collision = self.post_action_state( obj_agent_dist) # move left elif action == 3: self.ctrl.step(dict(action='MoveLeft')) reward, done, visible, obj_agent_dis_, first_person_obs, collision = self.post_action_state( obj_agent_dist) # move Ahead elif action == 4: self.ctrl.step(dict(action="MoveAhead")) reward, done, visible, obj_agent_dis_, first_person_obs, collision = self.post_action_state( obj_agent_dist) # Move back elif action == 5: self.ctrl.step(dict(action="MoveBack")) reward, done, visible, obj_agent_dis_, first_person_obs, collision = self.post_action_state( obj_agent_dist) # Crouch elif action == 6: self.ctrl.step(dict(action="Crouch")) reward, done, visible, obj_agent_dis_, first_person_obs, collision = self.post_action_state( obj_agent_dist) # Stand elif action == 7: self.ctrl.step(dict(action="Stand")) reward, done, visible, obj_agent_dis_, first_person_obs, collision = self.post_action_state( obj_agent_dist) # Look up elif action == 8: self.ctrl.step(dict(action="LookUp")) reward, done, visible, obj_agent_dis_, first_person_obs, collision = self.post_action_state( obj_agent_dist) # Look down elif action == 9: self.ctrl.step(dict(action="LookDown")) reward, done, visible, obj_agent_dis_, first_person_obs, collision = self.post_action_state( obj_agent_dist) # agent position agent_position, agent_rotation, agent_pose = self.agent_properties() return first_person_obs, agent_position, done, reward, obj_agent_dis_, visible, agent_pose, collision def agent_properties(self): agent_position = np.array( list(self.ctrl.last_event.metadata["agent"]["position"].values())) agent_rotation = np.array( list(self.ctrl.last_event.metadata["agent"]["rotation"].values())) agent_pose = np.concatenate((agent_position, agent_rotation), axis=0) return agent_position, agent_rotation, agent_pose def get_reachable_position(self): self.ctrl.step(dict(action='GetReachablePositions')) return pd.DataFrame( self.ctrl.last_event.metadata["reachablePositions"]).values def action_sampler(self): return np.random.choice(self.action_n) def object_properties(self): self.obj = self.ctrl.last_event.metadata["objects"] for obj in self.obj: if self.object_name in obj["name"]: goal_object = obj break obj_position = goal_object["position"] obj_visibility = goal_object["visible"] obj_agent_distance = goal_object["distance"] return obj_position, obj_visibility, obj_agent_distance def agent_random_init(self): reachable_positions = self.get_reachable_position() idx = np.random.choice(len(reachable_positions)) angle = np.random.choice(self.orientations) x_pos = reachable_positions[idx][0] y_pos = reachable_positions[idx][1] z_pos = reachable_positions[idx][2] agent_pose = self.ctrl.step( dict(action="Teleport", x=x_pos, y=y_pos, z=z_pos)) return agent_pose def random_goal_position(self): positions = self.get_reachable_position() idx = np.random.choice(len(positions)) position = positions[idx] return position def post_action_state(self, obj_agent_dist): _, visible, obj_agent_dis_ = self.object_properties() first_person_obs = self.ctrl.last_event.frame collide = not self.ctrl.last_event.metadata["lastActionSuccess"] if visible and not collide: reward = 0 done = True elif collide: reward = -1 done = True elif obj_agent_dis_ < obj_agent_dist: reward = -1 + (obj_agent_dist - obj_agent_dis_) done = False else: reward = -1 done = False return reward, done, visible, obj_agent_dis_, first_person_obs, collide