def dump_scene(scene_name, base_dir, renderObjectImage=False, renderDepthImage=False, renderClassImage=False): controller = ai2thor.controller.Controller() controller.start(player_screen_height=448, player_screen_width=448) controller.reset(scene_name) event = controller.step(dict(action='Initialize', fieldOfView=90, gridSize=0.25, renderDepthImage=renderDepthImage, renderObjectImage=renderObjectImage, renderClassImage=renderClassImage)) dump_scene_controller(base_dir, controller) controller.stop()
def test_simple_example(): """ Taken from here: http://ai2thor.allenai.org/tutorials/examples """ controller = ai2thor.controller.Controller() controller.start() # Kitchens: FloorPlan1 - FloorPlan30 # Living rooms: FloorPlan201 - FloorPlan230 # Bedrooms: FloorPlan301 - FloorPlan330 # Bathrooms: FloorPLan401 - FloorPlan430 controller.reset('FloorPlan28') controller.step(dict(action='Initialize', gridSize=0.25)) event = controller.step(dict(action='MoveAhead')) # Numpy Array - shape (width, height, channels), channels are in RGB order event.frame # Numpy Array in BGR order suitable for use with OpenCV event.cv2img # current metadata dictionary that includes the state of the scene event.metadata
def determinism_test(all_tests): # Redo the actions 20 times: # only do this if an object is picked up for k, test_point in all_tests.items(): initial_location = test_point["initial_location"] initial_rotation = test_point["initial_rotation"] all_commands = test_point["all_commands"] final_state = test_point["final_state"] initial_pose = test_point["initial_pose"] scene_name = test_point["scene_name"] controller.reset(scene_name) controller.step( action="TeleportFull", x=initial_location["x"], y=initial_location["y"], z=initial_location["z"], rotation=dict(x=0, y=initial_rotation, z=0), horizon=10, ) controller.step("PausePhysicsAutoSim") for cmd in all_commands: execute_command(controller, cmd, ADITIONAL_ARM_ARGS) current_state = get_current_full_state(controller) if not two_dict_equal(final_state, current_state): print("not deterministic") print("scene name", controller.last_event.metadata["sceneName"]) print("initial pose", initial_pose) print("list of actions", all_commands) pdb.set_trace() else: print("test {} passed".format(k))
def determinism_test(all_tests): # Redo the actions 20 times: # only do this if an object is picked up for k, test_point in all_tests.items(): initial_location = test_point['initial_location'] initial_rotation = test_point['initial_rotation'] all_commands = test_point['all_commands'] final_state = test_point['final_state'] initial_pose = test_point['initial_pose'] scene_name = test_point['scene_name'] controller.reset(scene_name) event1 = controller.step(action='TeleportFull', x=initial_location['x'], y=initial_location['y'], z=initial_location['z'], rotation=dict(x=0, y=initial_rotation, z=0), horizon=10) controller.step('PausePhysicsAutoSim') for cmd in all_commands: execute_command(controller, cmd, ADITIONAL_ARM_ARGS) last_event_success = controller.last_event.metadata[ 'lastActionSuccess'] current_state = get_current_full_state(controller) if not two_dict_equal(final_state, current_state): print('not deterministic') print('scene name', controller.last_event.metadata['sceneName']) print('initial pose', initial_pose) print('list of actions', all_commands) pdb.set_trace() else: print('test {} passed'.format(k))
def test_rectangle_aspect(): controller = ai2thor.controller.Controller() controller.releases_dir = releases_dir.__get__( controller, ai2thor.controller.Controller) print("trying to start unity") controller.start(player_screen_width=600, player_screen_height=300) print("started") controller.reset('FloorPlan28') event = controller.step(dict(action='Initialize', gridSize=0.25)) assert event.frame.shape == (300, 600, 3)
def run(file_name=None): # file_name = file_path.split('/')[-1].split('.')[0] controller = ai2thor.controller.Controller() controller.start() controller.reset("FloorPlan203") y_coord = 1.25 event = controller.step( dict(action='Initialize', gridSize=0.5, cameraY=y_coord, visibilityDistance=1.0)) all_visible_objects = list( np.unique([obj['objectType'] for obj in event.metadata['objects']])) rotation = 0.0 while True: # making a loop try: # used try so that if user pressed other than the given key error will not be shown key = click.getchar() if key == 'a': # Rotate Left rotation -= 22.5 if rotation < 0: rotation = rotation + 360 event = controller.step( dict(action='Rotate', rotation=rotation)) elif key == 'd': rotation += 22.5 if rotation > 360: rotation = rotation - 360 event = controller.step( dict(action='Rotate', rotation=rotation)) elif key == 'w': event = controller.step(dict(action='MoveAhead')) elif key == 's': event = controller.step(dict(action='MoveBack')) elif key == 'z': event = controller.step(dict(action='LookDown')) elif key == 'x': event = controller.step(dict(action='LookUp')) elif key == 'q': controller.stop() break elif key == 'r': scene = input("Scene id: ") controller.reset('FloorPlan{}'.format(scene)) event = controller.step( dict(action='Initialize', gridSize=0.5, cameraY=y_coord)) else: print("Key not supported! Try a, d, w, s, q, r.") print((event.metadata['agent']['position']['x'], event.metadata['agent']['position']['z'], event.metadata['agent']['rotation'])) # print([(obj['objectType'], obj['distance']) for obj in event.metadata['objects'] if obj['visible']]) except: print("Key not supported! Try a, d, w, s, q, r.")
def test_calling_complex_actions(): """ Examples of how to interact with environment internals e.g. picking up, placing and opening objects. Taken from here: http://ai2thor.allenai.org/tutorials/examples """ controller = ai2thor.controller.Controller() controller.start() controller.reset('FloorPlan28') controller.step(dict(action='Initialize', gridSize=0.25)) controller.step(dict(action='Teleport', x=-1.25, y=1.00, z=-1.5)) controller.step(dict(action='LookDown')) event = controller.step(dict(action='Rotate', rotation=90)) # In FloorPlan28, the agent should now be looking at a mug for obj in event.metadata['objects']: if obj['visible'] and obj['pickupable'] and obj[ 'objectType'] == 'Mug': event = controller.step(dict(action='PickupObject', objectId=obj['objectId']), raise_for_failure=True) mug_object_id = obj['objectId'] break # the agent now has the Mug in its inventory # to put it into the Microwave, we need to open the microwave first event = controller.step(dict(action='LookUp')) for obj in event.metadata['objects']: if obj['visible'] and obj['openable'] and obj[ 'objectType'] == 'Microwave': event = controller.step(dict(action='OpenObject', objectId=obj['objectId']), raise_for_failure=True) receptacle_object_id = obj['objectId'] break event = controller.step(dict(action='MoveRight'), raise_for_failure=True) event = controller.step(dict(action='PutObject', receptacleObjectId=receptacle_object_id, objectId=mug_object_id), raise_for_failure=True) # close the microwave event = controller.step(dict(action='CloseObject', objectId=receptacle_object_id), raise_for_failure=True)
def main(): wait = True offscreen_z = -3 # set z to here to teleport object off-screen # should work for objects we're using... if object still visible, decrease number controller = ai2thor.controller.Controller(quality='High') controller.start(player_screen_height=player_screen_height, player_screen_width=player_screen_width) controller.reset('FloorPlan1') controller.step(dict(action='Initialize', gridsize=0.25)) # move unnecessary objects offscreen # controller.step(dict(action='TeleportObject', objectId=objectId_dict['apple'], z=offscreen_z)) # controller.step(dict(action='TeleportObject', objectId=objectId_dict['bread'], z=offscreen_z)) # controller.step(dict(action='TeleportObject', objectId=objectId_dict['butter_knife'], z=offscreen_z)) # controller.step(dict(action='TeleportObject', objectId=objectId_dict['butter_knife'], z=offscreen_z)) # controller.step(dict(action='TeleportFull', x=0, y=1, z=-1.75, rotation=180, horizon=0)) if wait: input("press enter to close...")
def dump_scene( scene_name, base_dir, renderInstanceSegmentation=False, renderDepthImage=False, renderSemanticSegmentation=False, ): controller = ai2thor.controller.Controller() controller.start(height=448, width=448) controller.reset(scene_name) event = controller.step( dict( action="Initialize", fieldOfView=90, gridSize=0.25, renderDepthImage=renderDepthImage, renderInstanceSegmentation=renderInstanceSegmentation, renderSemanticSegmentation=renderSemanticSegmentation, )) dump_scene_controller(base_dir, controller) controller.stop()
def setup_scene(controller, scene_name): controller.reset(scene_name) event = controller.step(dict(action='Initialize', fieldOfView=args.fov)) # Remove all small objects in the scene (can filter in other ways) for obj in event.metadata['objects']: if obj['pickupable']: _ = controller.step( dict(action='RemoveFromScene', objectId=obj['objectId'])) # Get navigable points event = controller.step(dict(action='GetReachablePositions')) navigable_points = event.metadata['actionReturn'] # Get (x, z) centre of scene xs, zs = [p['x'] for p in navigable_points], [p['z'] for p in navigable_points] centre = np.array([(max(xs) - min(xs)) / 2 + min(xs), (max(zs) - min(zs)) / 2 + min(zs)]) return navigable_points, centre
def save_layout(*layouts, wait=False, save_all=False): # layouts: list of layouts you want to save. # wait: whether to wait for user input before moving on to next layout # save_all: whether to save all layouts controller = ai2thor.controller.Controller(quality='High') controller.start(player_screen_height=800, player_screen_width=1200) controller.reset('FloorPlan1') controller.step(dict(action='Initialize', gridsize=0.25)) if save_all: for layout in layout_dict.keys(): if wait: event = view_layout(layout, controller) else: event = view_layout(layout, controller, False) save_img(event, layout) else: for layout in layouts: if wait: event = view_layout(layout, controller) else: event = view_layout(layout, controller, False) save_img(event, layout)
def check_size(scene): f = h5py.File("dumped/{}.hdf5".format(scene), "w") locations = [] visible_objects = [] controller = ai2thor.controller.Controller() controller.start() controller.reset(scene) controller.random_initialize(unique_object_types=True) event = controller.step(dict(action='Initialize', gridSize=0.5)) y_coord = event.metadata['agent']['position']['y'] locations.append((event.metadata['agent']['position']['x'], event.metadata['agent']['position']['z'])) # Using BFS to discover all reachable positions in current environment. visited = set() visited.add((event.metadata['agent']['position']['x'], event.metadata['agent']['position']['z'])) while len(locations) > 0: loc = locations.pop(0) for act in ALL_POSSIBLE_ACTIONS: controller.step( dict(action='Teleport', x=loc[0], y=y_coord, z=loc[1])) event = controller.step(dict(action=act)) if event.metadata['lastActionSuccess']: new_loc = (event.metadata['agent']['position']['x'], event.metadata['agent']['position']['z']) if new_loc not in visited: visited.add(new_loc) locations.append(new_loc) all_locs = list(visited) controller.stop() return len(all_locs)
def dump_feature(scene, cat2idx): f = h5py.File("dumped/{}.hdf5".format(scene), "a") states = f['locations'][()] laser = f['lasers'][()] dump_features = [] controller = ai2thor.controller.Controller() controller.start() controller.reset(scene) event = controller.step( dict(action='Initialize', gridSize=0.5, visibilityDistance=1000.0)) for i, state in enumerate(states): event = controller.step( dict(action='TeleportFull', x=state[0], y=1.25, z=state[1], rotation=state[2], horizon=30)) visible = [obj for obj in event.metadata['objects'] if obj['visible']] df = np.zeros(len(cat2idx) + 4) df[-4:] = laser[i].tolist() for obj in visible: try: obj_id = cat2idx[obj['objectType']] df[obj_id] = obj['distance'] except: print(obj['objectType']) dump_features.append(df) controller.stop() f.create_dataset("dump_features", data=np.asarray(dump_features, np.float32)) f.close()
def view_layout(layout, controller=None, wait=True): # layout: layout that you would like to view # controller: feed in a controller, or leave as None to have the function create one. Providing a controller is # useful if you are viewing multiple layouts consecutively and want only one unity window # wait: whether to wait for user input before closing unity window if controller is None: controller = ai2thor.controller.Controller(quality='High') controller.start(player_screen_height=player_screen_height, player_screen_width=player_screen_width) controller.reset('FloorPlan1') controller.step(dict(action='Initialize', gridsize=0.25)) else: controller.reset('FloorPlan1') # how to create/teleport an object # controller.step( # dict(action='CreateObject', objectType='Tomato', randomizeObjectAppearance=False, objectVariation=1)) # controller.step(dict(action='DropHandObject')) # controller.step(dict(action='TeleportObject', objectId='Tomato|1', x=-0.39, y=1.74, z=-0.81)) event = arrange(controller, layout) if wait: input("press enter to close...") return event
def read_and_process(): event = controller.reset('FloorPlan227') event = controller.step( dict( action='Initialize', gridSize=0.25, # cameraY=0.75, # qualitySetting='MediumCloseFitShadows', # renderImage=True, renderDepthImage=True, renderClassImage=True, renderObjectImage=True)) while True: key = getch() if key == 'q': break elif key == 'w': for _ in range(1): event = controller.step(dict(action='MoveAhead')) elif key == 'a': for _ in range(1): event = controller.step(dict(action='MoveLeft')) elif key == 's': for _ in range(1): event = controller.step(dict(action='MoveBack')) elif key == 'd': for _ in range(1): event = controller.step(dict(action='MoveRight')) elif key == 'h': y = event.metadata['agent']['rotation']['y'] event = controller.step( dict(action='Rotate', rotation=y - rotationCorner)) elif key == 'l': y = event.metadata['agent']['rotation']['y'] event = controller.step( dict(action='Rotate', rotation=y + rotationCorner)) elif key == 'j': event = controller.step(dict(action='LookDown')) elif key == 'k': event = controller.step(dict(action='LookUp')) elif key == 'r': frame = event.cv2image() cv2.imwrite('sense.jpg', frame) image = cv2.imread('sense.jpg') cv2.imshow("object detection", image) cv2.waitKey()
def test_rectangle_aspect(): controller = UnityTestController(width=600, height=300) controller.reset('FloorPlan28') event = controller.step(dict(action='Initialize', gridSize=0.25)) assert event.frame.shape == (300, 600, 3)
import time import numpy as np from pprint import pprint thor_scene = ["FloorPlan311"] robot_scene = ["FloorPlan_Train1_1", "FloorPlan_Train1_3"] runs = [ {'id': 'thor', 'port':8200, 'controller': ai2thor.controller.Controller}, {'id': 'robot', 'port':9200, 'controller': ai2thor.controller.Controller} ] for run_config in runs: id = run_config['id'] port = run_config['port'] if id=='thor': controller = run_config['controller'](port=port,scene=thor_scene[0], gridSize=0.25, rotateStepDegrees=1) # if not specify the scene argument when using thor, scene in robot_scene wiil be used by default. else: controller = run_config['controller'](port=port, agentMode='bot', agentType='stochastic', gridSize=0.25, applyActionNoise=True, rotateStepDegrees=1) for scene in thor_scene: print(scene) event = controller.reset(scene)# not necessarily for i in range(10): # event = controller.step(dict(action='Initialize', gridSize=0.25, fieldOfView=90, renderObjectImage=True)) # event = controller.step(dict(action='InitialRandomSpawn', forceVisible=True, maxNumRepeats=10, randomSeed=1)) # event = controller.step(dict(action='MoveAhead', noise=0.02)) event = controller.step(dict(action='RotateLeft')) # print("event for '{}':".format(run_config['id'])) # pprint(event.metadata) time.sleep(1)
def inference_worker(worker_ind: int, in_queue: mp.Queue, out_queue: mp.Queue, agent_class: Any, agent_kwargs: Dict[str, Any], controller_kwargs: Dict[str, Any], max_steps: int, test: bool): agent = agent_class(**agent_kwargs) controller = ai2thor.controller.Controller(**controller_kwargs) while True: try: e = in_queue.get(timeout=1) except queue.Empty: break logger.info( "Task Start id:{id} scene:{scene} target_object:{object_type} initial_position:{initial_position} rotation:{initial_orientation}" .format(**e)) controller.initialization_parameters[ "robothorChallengeEpisodeId"] = e["id"] print(e["scene"]) controller.reset(e["scene"]) teleport_action = { "action": "TeleportFull", **e["initial_position"], "rotation": { "x": 0, "y": e["initial_orientation"], "z": 0 }, "horizon": e["initial_horizon"], "standing": True } controller.step(action=teleport_action) total_steps = 0 agent.reset() episode_metrics = { "trajectory": [{ **e["initial_position"], "rotation": float(e["initial_orientation"]), "horizon": e["initial_horizon"] }], "actions_taken": [] } stopped = False while total_steps < max_steps and stopped is False: total_steps += 1 event = controller.last_event event.metadata.clear() action = agent.act({ "object_goal": e["object_type"], "depth": event.depth_frame, "rgb": event.frame }) if action not in ALLOWED_ACTIONS: raise ValueError( "Invalid action: {action}".format(action=action)) logger.info("Agent action: {action}".format(action=action)) event = controller.step(action=action) episode_metrics["trajectory"].append({ **event.metadata["agent"]["position"], "rotation": event.metadata["agent"]["rotation"]["y"], "horizon": event.metadata["agent"]["cameraHorizon"] }) episode_metrics["actions_taken"].append({ "action": action, "success": event.metadata["lastActionSuccess"] }) stopped = action == "Stop" if not test: target_obj = get_object_by_type(event.metadata["objects"], e["object_type"]) assert target_obj is not None target_visible = target_obj["visible"] episode_metrics["success"] = stopped and target_visible if not test: episode_result = { "path": episode_metrics["trajectory"], "shortest_path": e["shortest_path"], "success": episode_metrics["success"] } else: episode_result = None out_queue.put((e["id"], episode_metrics, episode_result)) controller.stop() print(f"Worker {worker_ind} Finished.")
def reset_the_scene_and_get_reachables(scene_name=None): if scene_name is None: scene_name = random.choice(scene_names) controller.reset(scene_name) return get_reachable_positions(controller)
time.sleep(0.1) elif keyboard.is_pressed('down'): event = controller.step(dict(action='LookDown')) time.sleep(0.1) elif keyboard.is_pressed('w'): event = controller.step(dict(action='MoveAhead')) time.sleep(0.05) elif keyboard.is_pressed('s'): event = controller.step(dict(action='MoveBack')) time.sleep(0.05) elif keyboard.is_pressed('d'): time.sleep(0.05) event = controller.step(dict(action='MoveRight')) elif keyboard.is_pressed('a'): time.sleep(0.05) event = controller.step(dict(action='MoveLeft')) elif keyboard.is_pressed('f'): takePicture(event) elif keyboard.is_pressed('c'): time.sleep(0.1) pickUp(event) elif keyboard.is_pressed('v'): event = controller.step(dict(action='DropHandObject')) elif keyboard.is_pressed('q'): time.sleep(0.05) controller.reset('FloorPlan'+ str(random.randint(1, 30))) controller.step(dict(action='Initialize', gridSize=0.25)) controller.step(dict(action = 'InitialRandomSpawn', randomSeed = 0, forceVisible = False, maxNumRepeats = 5)) elif keyboard.is_pressed('esc'): #event.stop() break
def get_points(controller, object_type, scene, objects_types_in_scene, failed_points, grid_size, rotate_by, desired_points=30): print("Getting points in scene {}, with target object {}...: ".format( scene, object_type)) controller.reset(scene) event = controller.step( dict(action='ObjectTypeToObjectIds', objectType=object_type.replace(" ", ""))) object_ids = event.metadata['actionReturn'] if object_ids is None or len(object_ids) > 1 or len(object_ids) == 0: print("Object type '{}' not available in scene.".format(object_type)) return None objects_types_in_scene.add(object_type) object_id = object_ids[0] event_reachable = controller.step( dict(action='GetReachablePositions', gridSize=grid_size)) target_position = controller.step( action='GetObjectPosition', objectId=object_id).metadata['actionReturn'] reachable_positions = event_reachable.metadata['actionReturn'] reachable_pos_set = set([ (pos['x'], pos['y'], pos['z']) for pos in reachable_positions # if sqr_dist_dict(pos, target_position) >= visibility_distance * visibility_multiplier_filter ]) def filter_points(selected_points, point_set, minimum_distance): result = set() for selected in selected_points: if selected in point_set: result.add(selected) remove_set = set([ p for p in point_set if sqr_dist(p, selected) <= minimum_distance * minimum_distance ]) point_set = point_set.difference(remove_set) return result random.seed() points = random.sample(reachable_pos_set, desired_points * 4) final_point_set = filter_points(points, reachable_pos_set, grid_size * 2) print("Total number of points: {}".format(len(final_point_set))) print("Id {}".format(event.metadata['actionReturn'])) point_objects = [] eps = 0.0001 counter = 0 for (x, y, z) in final_point_set: possible_orientations = list(range(0, 360, rotate_by)) pos_unity = dict(x=x, y=y, z=z) try: path = metrics.get_shortest_path_to_object(controller, object_id, pos_unity, { 'x': 0, 'y': 0, 'z': 0 }) minimum_path_length = metrics.path_distance(path) rotation_allowed = False while not rotation_allowed: if len(possible_orientations) == 0: break roatation_y = random.choice(possible_orientations) possible_orientations.remove(roatation_y) evt = controller.step(action="TeleportFull", x=pos_unity['x'], y=pos_unity['y'], z=pos_unity['z'], rotation=dict(x=0, y=roatation_y, z=0)) rotation_allowed = evt.metadata['lastActionSuccess'] if not evt.metadata['lastActionSuccess']: print(evt.metadata['errorMessage']) print("--------- Rotation not allowed! for pos {} rot {} ". format(pos_unity, roatation_y)) if minimum_path_length > eps and rotation_allowed: m = re.search('FloorPlan_([a-zA-Z\-]*)([0-9]+)_([0-9]+)', scene) point_id = "{}_{}_{}_{}_{}".format(m.group(1), m.group(2), m.group(3), object_type, counter) point_objects.append({ 'id': point_id, 'scene': scene, 'object_type': object_type, 'object_id': object_id, 'target_position': target_position, 'initial_position': pos_unity, 'initial_orientation': roatation_y, 'shortest_path': path, 'shortest_path_length': minimum_path_length }) counter += 1 except ValueError: print("-----Invalid path discarding point...") failed_points.append({ 'scene': scene, 'object_type': object_type, 'object_id': object_id, 'target_position': target_position, 'initial_position': pos_unity }) # sorted_objs = sorted(point_objects, # key=lambda m: sqr_dist_dict(m['initial_position'], m['target_position'])) sorted_objs = sorted(point_objects, key=lambda m: m['shortest_path_length']) return sorted_objs
def test_small_aspect(): controller = UnityTestController(width=128, height=64) controller.reset('FloorPlan28') event = controller.step(dict(action='Initialize', gridSize=0.25)) assert event.frame.shape == (64, 128, 3)
import ai2thor.controller import keyboard import time import voicecontrol if __name__ == "__main__": #Start up ai2thor and initialize floor controller = ai2thor.controller.Controller() controller.start(player_screen_height=666, player_screen_width=666) controller.reset('FloorPlan3') ##FloorPlan: 0->30 ; 201->230 ; 301->330 ; 401->430 controller.step(dict(action='Initialize', gridSize=0.5)) print('Initialized') #Control while True: if keyboard.is_pressed('a'): event = controller.step(dict(action='MoveLeft')) time.sleep(0.3) elif keyboard.is_pressed('d'): event = controller.step(dict(action='MoveRight')) time.sleep(0.3) elif keyboard.is_pressed('w'): event = controller.step(dict(action='MoveAhead')) time.sleep(0.3) elif keyboard.is_pressed('s'): event = controller.step(dict(action='MoveBack')) time.sleep(0.3) elif keyboard.is_pressed('up arrow'): event = controller.step(dict(action='LookUp')) time.sleep(0.3)
bbox_frame = np.array(img) event.bbox_3d_frame = bbox_frame return event if __name__ == "__main__": # give the height and width of the 2D image and scene id w, h = 900, 900 scene = "FloorPlan2{:02d}_physics".format(1) # allocate controller and initialize the scene and agent # local_path = "src/ai2thor/unity/builds/thor-local-OSXIntel64.app/Contents/MacOS/AI2-Thor" local_path = "" controller = ai2thor.controller.Controller(local_path=local_path) _ = controller.start(width=w, height=h) _ = controller.reset(scene) event = controller.step( dict(action='Initialize', gridSize=0.25, renderClassImage=True, renderObjectImage=True, renderDepthImage=True, fieldOfView=90)) # do something then draw the 3D bbox in 2D image event = controller.step(dict(action="MoveAhead")) event = controller.step(dict(action="MoveAhead")) event = controller.step( dict(action="Rotate", rotation=dict(x=0, y=30, z=0))) event = draw_3d_bbox(event) img = Image.fromarray(event.bbox_3d_frame, "RGB")
def start_controller(args, ep1, obj_list, target_parents): controller = ai2thor.controller.Controller() controller.start(player_screen_height=500, player_screen_width=500) controller.reset(ep1[0]) event = controller.step( dict(action='Initialize', gridSize=0.25, fieldOfView=90, renderObjectImage=True)) event = controller.step( dict(action='TeleportFull', x=ep1[2][0], y=ep1[2][1], z=ep1[2][2], rotation=ep1[2][3], horizon=ep1[2][4])) frames = [] rotation_list = ['RotateRight', 'RotateLeft'] angle = 0 for i in range(len(ep1) - 5): print(ep1[i + 4]) time.sleep(0.5) img = img_bbx(args, event, ep1[i + 4], obj_list, ep1, target_parents) pos = event.metadata['agent']['position'] rot = event.metadata['agent']['rotation'] if ep1[i + 4] == 'RotateLeft': event = controller.step( dict(action='TeleportFull', x=pos['x'], y=pos['y'], z=pos['z'], rotation=rot['y'] - 45, horizon=angle)) elif ep1[i + 4] == 'RotateRight': event = controller.step( dict(action='TeleportFull', x=pos['x'], y=pos['y'], z=pos['z'], rotation=rot['y'] + 45, horizon=angle)) elif ep1[i + 4] == 'LookDown': event = controller.step(dict(action=ep1[i + 4])) angle += 30 angle = np.clip(angle, -60, 60) elif ep1[i + 4] == 'LookUp': event = controller.step(dict(action=ep1[i + 4])) angle -= 30 angle = np.clip(angle, -60, 60) else: event = controller.step( dict(action='TeleportFull', x=pos['x'], y=pos['y'], z=pos['z'], rotation=rot['y'], horizon=angle)) event = controller.step(dict(action=ep1[i + 4])) frames.append(img) time.sleep(1.5) img = img_bbx(args, event, ep1[-1], obj_list, ep1, target_parents) frames.append(img) time.sleep(3) controller.stop() cv2.destroyAllWindows() return frames
import random import time import numpy as np from pprint import pprint fps = ["FloorPlan311"] runs = [ {'id': 'unity', 'controller': ai2thor.controller.Controller(port=8200)} # {'id': 'robot', 'port': 9200, 'controller': ai2thor.robot_controller.Controller()} #{'id': 'robot', 'port': 9000, 'controller': ai2thor.robot_controller.Controller()} ] for run_config in runs: # port = run_config['port'] controller = run_config['controller'] # event = controller.start(start_unity=False, host='127.0.0.1', port=port) # event = controller.step({'action': 'ChangeQuality', 'quality': 'High'}) # event = controller.step({"action": "ChangeResolution", "x": 300, "y": 300}) for fp in fps: print(fp) for i in range(1): event = controller.reset(fp) # event = controller.step(dict(action='Initialize', gridSize=0.25, fieldOfView=90, renderObjectImage=True)) # event = controller.step(dict(action='InitialRandomSpawn', forceVisible=True, maxNumRepeats=10, randomSeed=1)) # event = controller.step(dict(action='MoveAhead', noise=0.02)) event = controller.step(dict(action='RotateLeft')) print("event for '{}':".format(run_config['id'])) pprint(event.metadata) time.sleep(1)
import ai2thor.controller import cv2 import keyboard import yolo def exportFrame(): event = controller.step(dict(action='Initialize', continuous=True)) cv2.imwrite('frame.jpg', event.cv2img) yolo.detect() if __name__ == "__main__": controller = ai2thor.controller.Controller() controller.start(player_screen_height=480, player_screen_width=480) controller.reset('FloorPlan28') while True: try: if keyboard.is_pressed('a'): event = controller.step(dict(action='MoveLeft')) exportFrame() elif keyboard.is_pressed('d'): event = controller.step(dict(action='MoveRight')) exportFrame() elif keyboard.is_pressed('w'): event = controller.step(dict(action='MoveAhead')) exportFrame() elif keyboard.is_pressed('s'): event = controller.step(dict(action='MoveBack')) exportFrame() elif keyboard.is_pressed('right arrow'):
def write_frame(out_path, scene, controller, idx): scene_dict = {'name': scene, 0: {}, 90: {}, 180: {}, 270: {}} print('Name: ', scene) controller.reset(scene=scene) event = controller.step(dict(action='Initialize'), renderDepthImage=True, gridSize=0.25) corners = np.array(event.metadata['sceneBounds']['cornerPoints']) min_x, max_x = round(np.min(corners[:, 0])), round(np.max(corners[:, 0])) min_z, max_z = round(np.min(corners[:, 2])), round(np.max(corners[:, 2])) for ry in (0, 90, 180, 270): for j in np.arange(0.75, 1.21, 0.25): for k in np.arange(min_z, max_z + 0.1, 0.25): for i in np.arange(min_x, max_x + 0.1, 0.25): if ry == 0 and k + 1.5 > max_z: continue if ry == 90 and i + 1.5 > max_x: continue if ry == 180 and k - 1.5 < min_z: continue if ry == 270 and i - 1.5 < min_x: continue if (ry == 0 or ry == 180) and (i > max_x - 1.5 or i < min_x + 1.5): continue if (ry == 90 or ry == 270) and (k > max_z - 1.5 or k < min_z + 1.5): continue event = controller.step(action='TeleportFull', x=i, y=j, z=k, rotation=dict(x=0, y=ry, z=0), horizon=0.0) position = event.metadata['agent']['position'] x, y, z = position['x'], position['y'], position['z'] if abs(i-x) + abs(j - y) + abs(k - k) > 0.05: continue print(i, j, k, x, y, z, ' --- ', ry, scene) r1 = event.metadata['agent']['r1'] r2 = event.metadata['agent']['r2'] r3 = event.metadata['agent']['r3'] t = event.metadata['agent']['t'] image = event.frame image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) depth = event.depth_frame print(depth.shape) extrinsic = np.zeros((4, 4)) extrinsic[0, 0] = r1['x'] extrinsic[0, 1] = r1['y'] extrinsic[0, 2] = r1['z'] extrinsic[1, 0] = r2['x'] extrinsic[1, 1] = r2['y'] extrinsic[1, 2] = r2['z'] extrinsic[2, 0] = r3['x'] extrinsic[2, 1] = r3['y'] extrinsic[2, 2] = r3['z'] extrinsic[0, 3] = t['x'] * 1000 extrinsic[1, 3] = y * 1000 extrinsic[2, 3] = t['z'] * 1000 extrinsic[3, 3] = 1 if ry == 0 or ry == 180: if z not in scene_dict[ry].keys(): scene_dict[ry][z] = dict() scene_dict[ry][z]['count'] = 0 if y not in scene_dict[ry][z].keys(): scene_dict[ry][z][y] = dict() scene_dict[ry][z]['count'] += 1 scene_dict[ry][z][y][x] = {'x': x, 'y': y, 'z': z, 'depth': depth, 'image': image, 'extrinsic': extrinsic} else: if x not in scene_dict[ry].keys(): scene_dict[ry][x] = dict() scene_dict[ry][x]['count'] = 0 if y not in scene_dict[ry][x].keys(): scene_dict[ry][x][y] = dict() scene_dict[ry][x]['count'] += 1 scene_dict[ry][x][y][z] = {'x': x, 'y': y, 'z': z, 'depth': depth, 'image': image, 'extrinsic': extrinsic} # TO REMOVE REDUNDANT CAMERA ALONG AXIS X for ry in (0, 90, 180, 270): to_remove = [] for i in scene_dict[ry].keys(): if scene_dict[ry][i]['count'] <= 1: to_remove.append(i) for k in to_remove: scene_dict[ry].pop(i, None) for ry in (0, 90, 180, 270): for x in scene_dict[ry].keys(): pickle.dump({'rotation': ry, 'data': scene_dict[ry][x], 'name': scene}, open(os.path.join(out_path, scene + '_' + str(ry) + '_' + str(x)), 'wb'))
def test_stochastic_controller(): controller = UnityTestController(agentControllerType='stochastic') controller.reset('FloorPlan28') assert controller.last_event.metadata['lastActionSuccess']
def dump(scene, angle, resolution=(300, 300)): ''' Dump needed data to hdf5 file to speed up training. Dumped file can be loaded using: f = h5py.File(filename, 'r'), where: - f['locations'][()]: numpy array of all states in format (x, z, rotation, looking angle) - f['observations'][()]: numpy array of RGB images of corresponding states in f['locations'] - f['graph'][()]: numpy array representing transition graph between states. e.g: f[0] = array([ 16., 272., 1., -1.], dtype=float32) means from 1st locations, the agent will reach 16th state by taking action 0 (move forward), 272th state by taking action 1 (move backward), reach 1th state by taking action 2 (rotate right) and cannot take action 3 (rotate left) indicated by -1 value. - f['visible_objects'][()]: visible objects at corresponding states in f['locations'] - f['shortest'][()]: numpy array with shape of (num_states, num_states) indicating the shortest path length between every pair of states. ''' f = h5py.File("dumped/{}.hdf5".format(scene), "w") observations = [] dump_features = [] locations = [] visible_objects = [] controller = ai2thor.controller.Controller() controller.start() controller.reset(scene) event = controller.step( dict(action='Initialize', gridSize=0.5, cameraY=1.0, visibilityDistance=1.0)) y_coord = event.metadata['agent']['position']['y'] locations.append((event.metadata['agent']['position']['x'], event.metadata['agent']['position']['z'])) # Using BFS to discover all reachable positions in current environment. visited = set() visited.add((event.metadata['agent']['position']['x'], event.metadata['agent']['position']['z'])) while len(locations) > 0: loc = locations.pop(0) for act in ALL_POSSIBLE_ACTIONS: controller.step( dict(action='Teleport', x=loc[0], y=y_coord, z=loc[1])) event = controller.step(dict(action=act)) if event.metadata['lastActionSuccess']: new_loc = (event.metadata['agent']['position']['x'], event.metadata['agent']['position']['z']) if new_loc not in visited: visited.add(new_loc) locations.append(new_loc) all_locs = list(visited) print("{} locations".format(len(all_locs))) states = [] rotations = np.linspace(0, 360, int(360 // angle) + 1)[:-1].tolist() movement = { 0: [1, 0], 90.0: [0, 1], 180.0: [-1, 0], 270.0: [0, -1], 22.5: [1, 0], 67.5: [0, 1], 45.0: [1, 1], 112.5: [0, 1], 135.0: [-1, 1], 157.5: [-1, 0], 202.5: [-1, 0], 225.0: [-1, -1], 247.5: [0, -1], 292.5: [0, -1], 315: [1, -1], 337.5: [1, 0] } # Adding rotations and looking angles for loc in all_locs: for rot in rotations: states.append((loc[0], loc[1], rot)) # for horot in [-30, 0, 30, 60]: # states.append((loc[0], loc[1], rot, horot)) # ------------------------------------------------------------------------------ ## Calculate shortest path length array sta2idx = dict(zip(states, range(len(states)))) loc2idx = dict(zip(all_locs, range(len(all_locs)))) shortest_loc = np.zeros((len(all_locs), len(all_locs))) for i in range(len(all_locs)): dists = cal_min_dist(i, all_locs, loc2idx) for j, d in enumerate(dists): if j != i: shortest_loc[i, j] = d shortest_loc[j, i] = d shortest_state = np.zeros((len(states), len(states))) for i in range(len(states)): for j in range(len(states)): if i != j: from_loc = loc2idx[states[i][0], states[i][1]] to_loc = loc2idx[states[j][0], states[j][1]] shortest_state[i, j] = shortest_loc[from_loc, to_loc] shortest_state[j, i] = shortest_state[i, j] # ------------------------------------------------------------------------------ # Building transition graph graph = np.zeros(shape=(len(states), 4), dtype=int) for state in states: loc = (state[0], state[1]) rot = state[2] to_states = [] move = movement[rot] to_states.append((loc[0] + move[1] * 0.5, loc[1] + move[0] * 0.5, rot)) # move ahead to_states.append( (loc[0] - move[1] * 0.5, loc[1] - move[0] * 0.5, rot)) # move back to_states.append( (loc[0], loc[1], rot + angle if rot + angle < 360 else 0)) # turn right to_states.append( (loc[0], loc[1], rot - angle if rot - angle >= 0 else 360 - angle)) # turn left # to_states.append((loc[0], loc[1], rot + 30)) # look down # to_states.append((loc[0], loc[1], rot, horot - 30)) # look up state_idx = sta2idx[state] for i, new_state in enumerate(to_states): if new_state in sta2idx: graph[state_idx][i] = sta2idx[new_state] else: graph[state_idx][i] = -1 # ------------------------------------------------------------------------------ laser = {} ## Calculate laser for loc in all_locs: pos = (loc[0], loc[1], 0) north = 0 while graph[sta2idx[pos]][0] != -1: north += 1 pos = states[graph[sta2idx[pos]][0]] assert pos[2] == 0 pos = (loc[0], loc[1], 0) south = 0 while graph[sta2idx[pos]][1] != -1: south += 1 pos = states[graph[sta2idx[pos]][1]] assert pos[2] == 0 pos = (loc[0], loc[1], 90) right = 0 while graph[sta2idx[pos]][0] != -1: right += 1 pos = states[graph[sta2idx[pos]][0]] assert pos[2] == 90 pos = (loc[0], loc[1], 90) left = 0 while graph[sta2idx[pos]][1] != -1: left += 1 pos = states[graph[sta2idx[pos]][1]] assert pos[2] == 90 for r in rotations: if r > 315.0 or r < 45.0: laser[(loc[0], loc[1], r)] = [north, south, right, left] elif r > 45.0 and r < 135.0: laser[(loc[0], loc[1], r)] = [right, left, south, north] elif r > 135.0 and r < 225.0: laser[(loc[0], loc[1], r)] = [south, north, left, right] elif r > 225.0 and r < 315.0: laser[(loc[0], loc[1], r)] = [left, right, north, south] if 45.0 in rotations: for loc in all_locs: pos = (loc[0], loc[1], 45.0) north = 0 while graph[sta2idx[pos]][0] != -1: north += 1 pos = states[graph[sta2idx[pos]][0]] assert pos[2] == 45.0 pos = (loc[0], loc[1], 45.0) south = 0 while graph[sta2idx[pos]][1] != -1: south += 1 pos = states[graph[sta2idx[pos]][1]] assert pos[2] == 45.0 pos = (loc[0], loc[1], 135.0) right = 0 while graph[sta2idx[pos]][0] != -1: right += 1 pos = states[graph[sta2idx[pos]][0]] assert pos[2] == 135.0 pos = (loc[0], loc[1], 135.0) left = 0 while graph[sta2idx[pos]][1] != -1: left += 1 pos = states[graph[sta2idx[pos]][1]] assert pos[2] == 135.0 laser[(loc[0], loc[1], 45.0)] = [north, south, right, left] laser[(loc[0], loc[1], 225.0)] = [south, north, left, right] laser[(loc[0], loc[1], 135.0)] = [right, left, south, north] laser[(loc[0], loc[1], 315.0)] = [left, right, north, south] lasers = [] for state in states: lasers.append(laser[state]) # ------------------------------------------------------------------------------ # Adding observations for state in states: vis_objects = set() event = controller.step( dict(action='TeleportFull', x=state[0], y=1.25, z=state[1], rotation=state[2], horizon=30)) resized_frame = cv2.resize(event.frame, (resolution[0], resolution[1])) observations.append(resized_frame) visible = [obj for obj in event.metadata['objects'] if obj['visible']] for obj in visible: vis_objects.add(obj['objectType']) if len(vis_objects) > 0: visible_objects.append(",".join(list(vis_objects))) else: visible_objects.append("") # ------------------------------------------------------------------------------ print("{} states".format(len(states))) all_visible_objects = list( set(",".join([o for o in visible_objects if o != '']).split(','))) all_visible_objects.sort() for c in ['Lamp', 'PaperTowelRoll', 'Glassbottle']: if c in all_visible_objects: all_visible_objects.remove(c) controller.stop() f.create_dataset("locations", data=np.asarray(states, np.float32)) f.create_dataset("observations", data=np.asarray(observations, np.uint8)) f.create_dataset("graph", data=graph) f.create_dataset("visible_objects", data=np.array(visible_objects, dtype=object), dtype=h5py.special_dtype(vlen=str)) f.create_dataset("all_visible_objects", data=np.array(all_visible_objects, dtype=object), dtype=h5py.special_dtype(vlen=str)) f.create_dataset("shortest", data=shortest_state) f.create_dataset("lasers", data=np.asarray(lasers, np.float32)) f.close() return y_coord