def run_recipes(self): """Returns different permutations of completing recipes.""" self.sw = STRIPSWorld(world=self.world, recipes=self.recipes) # [path for recipe 1, path for recipe 2, ...] where each path is a list of actions subtasks = self.sw.get_subtasks(max_path_length=self.arglist.max_num_subtasks) all_subtasks = [subtask for path in subtasks for subtask in path] print('Subtasks:', all_subtasks, '\n') return all_subtasks
def get_subtasks(self, world): """Return different subtask permutations for recipes.""" self.sw = STRIPSWorld(world, self.recipes) # [path for recipe 1, path for recipe 2, ...] where each path is a list of actions. subtasks = self.sw.get_subtasks(max_path_length=self.arglist.max_num_subtasks) all_subtasks = [subtask for path in subtasks for subtask in path] # Uncomment below to view graph for recipe path i # i = 0 # pg = recipe_utils.make_predicate_graph(self.sw.initial, recipe_paths[i]) # ag = recipe_utils.make_action_graph(self.sw.initial, recipe_paths[i]) return all_subtasks
class OvercookedEnvironment(gym.Env): """Environment object for Overcooked.""" def __init__(self, arglist): self.arglist = arglist self.t = 0 # self.set_filename() self.action_space = spaces.MultiDiscrete([4]) """ Considers a 7*4 gridworld with the following values for states: Floor - 0 Counter - 1 Cutboard - 2 Delivery - 3 Plate - 4 Tomato - 5 Agent - 6 Last 3 states represent: Location of tomato Location of plate State of tomato - 0/1 = Unchopped/Chopped """ obs_space = [7 for x in range(28)] + [28, 28, 2] self.observation_space = spaces.MultiDiscrete(obs_space) # For visualizing episode. self.rep = [] # For tracking data during an episode. self.collisions = [] self.termination_info = "" self.successful = False def get_repr(self): return self.world.get_repr() + tuple( [agent.get_repr() for agent in self.sim_agents]) def __str__(self): # Print the world and agents. _display = list( map(lambda x: ''.join(map(lambda y: y + ' ', x)), self.rep)) return '\n'.join(_display) def __eq__(self, other): return self.get_repr() == other.get_repr() def __copy__(self): new_env = OvercookedEnvironment(self.arglist) new_env.__dict__ = self.__dict__.copy() new_env.world = copy.copy(self.world) new_env.sim_agents = [copy.copy(a) for a in self.sim_agents] new_env.distances = self.distances # Make sure new objects and new agents' holdings have the right pointers. for a in new_env.sim_agents: if a.holding is not None: a.holding = new_env.world.get_object_at(location=a.location, desired_obj=None, find_held_objects=True) return new_env def set_filename(self): self.filename = "{}_agents{}_seed{}".format(self.arglist.level,\ self.arglist.num_agents, self.arglist.seed) model = "" if self.arglist.model1 is not None: model += "_model1-{}".format(self.arglist.model1) if self.arglist.model2 is not None: model += "_model2-{}".format(self.arglist.model2) if self.arglist.model3 is not None: model += "_model3-{}".format(self.arglist.model3) if self.arglist.model4 is not None: model += "_model4-{}".format(self.arglist.model4) self.filename += model def load_level(self, level, num_agents): x = 0 y = 0 idx = 0 with open('utils/levels/{}.txt'.format(level), 'r') as file: # Mark the phases of reading. phase = 1 for line in file: line = line.strip('\n') if line == '': phase += 1 # Phase 1: Read in kitchen map. elif phase == 1: for x, rep in enumerate(line): # Object, i.e. Tomato, Lettuce, Onion, or Plate. if rep in 'tlop': counter = Counter(location=(x, y)) obj = Object(location=(x, y), contents=RepToClass[rep]()) counter.acquire(obj=obj) self.world.insert(obj=counter) self.world.insert(obj=obj) # only plate and tomato are part of state for now if rep == 'p': self.state[idx] = 4 elif rep == 't': self.state[idx] = 5 self.tomato_start = idx # GridSquare, i.e. Floor, Counter, Cutboard, Delivery. elif rep in RepToClass: newobj = RepToClass[rep]((x, y)) if newobj.name == "Counter": self.state[idx] = 1 elif newobj.name == "Cutboard": self.state[idx] = 2 elif newobj.name == "Delivery": self.state[idx] = 3 self.world.objects.setdefault(newobj.name, []).append(newobj) else: # Empty. Set a Floor tile. f = Floor(location=(x, y)) self.world.objects.setdefault('Floor', []).append(f) idx += 1 y += 1 # Phase 2: Read in recipe list. elif phase == 2: self.recipes.append(globals()[line]()) # Phase 3: Read in agent locations (up to num_agents). elif phase == 3: if len(self.sim_agents) < num_agents: loc = line.split(' ') sim_agent = SimAgent( name='agent-' + str(len(self.sim_agents) + 1), id_color=COLORS[len(self.sim_agents)], location=(int(loc[0]), int(loc[1]))) self.sim_agents.append(sim_agent) idx = (int(loc[1])) * (x + 1) + int(loc[0]) self.state[idx] = 6 self.distances = {} self.world.width = x + 1 self.world.height = y self.world.perimeter = 2 * (self.world.width + self.world.height) # Get index of location in state vector def get_state_idx(self, obj): return obj.location[1] * self.world.width + obj.location[0] # Update the state based on current world def update_state(self): chopped = 0 tomato_loc = 0 plate_loc = 0 # Find out if tomatoes are chopped. Works only when not plated for subtask in self.all_subtasks: if isinstance(subtask, recipe.Chop): _, goal_obj = nav_utils.get_subtask_obj(subtask) goal_obj_locs = self.world.get_all_object_locs(obj=goal_obj) if goal_obj_locs: chopped = 1 for obj in self.world.objects["Tomato"]: if obj.location: tomato_loc = self.get_state_idx(obj) objs = [] for o in self.world.objects.values(): objs += o for obj in objs: idx = obj.location[1] * self.world.width + obj.location[0] if obj.name == "Counter": self.state[idx] = 1 elif obj.name == "Cutboard": self.state[idx] = 2 elif obj.name == "Delivery": self.state[idx] = 3 elif obj.name == "Plate": self.state[idx] = 4 elif obj.name == "Plate-Tomato": tomato_loc = obj.location[1] * self.world.width + obj.location[ 0] plate_loc = obj.location[1] * self.world.width + obj.location[0] chopped = 1 self.state[28] = tomato_loc self.state[29] = plate_loc self.state[30] = chopped def reset(self): self.world = World(arglist=self.arglist) self.recipes = [] self.sim_agents = [] self.agent_actions = {} self.t = 0 # For visualizing episode. self.rep = [] # For tracking data during an episode. self.collisions = [] self.termination_info = "" self.successful = False # State for network self.state = np.zeros(31) self.pick = False # Load world & distances. self.load_level(level=self.arglist.level, num_agents=self.arglist.num_agents) self.all_subtasks = self.run_recipes() # Keep track of subtask completion self.subtask_status = np.zeros(len(self.all_subtasks)) self.world.make_loc_to_gridsquare() self.world.make_reachability_graph() self.cache_distances() self.obs_tm1 = copy.copy(self) if self.arglist.record or self.arglist.with_image_obs: self.game = GameImage(filename="test", world=self.world, sim_agents=self.sim_agents, record=self.arglist.record) self.game.on_init() if self.arglist.record: self.game.save_image_obs(self.t) return self.state def close(self): return def step(self, action): # Track internal environment info. self.t += 1 print("===============================") print("[environment.step] @ TIMESTEP {}".format(self.t)) print("===============================") # Only single agent action NAV_ACTIONS = [(0, 1), (0, -1), (-1, 0), (1, 0)] action = NAV_ACTIONS[action[0]] sim_loc = self.sim_agents[0].location sim_state_idx = sim_loc[1] * self.world.width + sim_loc[0] # Get actions. for sim_agent in self.sim_agents: sim_agent.action = action # Check collisions. self.check_collisions() self.obs_tm1 = copy.copy(self) # Execute. self.execute_navigation() sim_loc = self.sim_agents[0].location sim_state_new_idx = sim_loc[1] * self.world.width + sim_loc[0] self.state[sim_state_idx] = 0 self.state[sim_state_new_idx] = 6 self.update_state() # Visualize. self.display() self.print_agents() if self.arglist.record: self.game.save_image_obs(self.t) # print(self.state) # Get a plan-representation observation. new_obs = copy.copy(self) # Get an image observation image_obs = self.game.get_image_obs() done = self.done() reward = self.reward() info = { "t": self.t, "obs": self.state, "image_obs": image_obs, "done": done, "termination_info": self.termination_info } return self.state, reward, done, info def done(self): # Done if the episode maxes out if self.t >= self.arglist.max_num_timesteps and self.arglist.max_num_timesteps: self.termination_info = "Terminating because passed {} timesteps".format( self.arglist.max_num_timesteps) self.successful = False return True assert any([ isinstance(subtask, recipe.Deliver) for subtask in self.all_subtasks ]), "no delivery subtask" # Done if subtask is completed. for subtask in self.all_subtasks: # Double check all goal_objs are at Delivery. if isinstance(subtask, recipe.Deliver): _, goal_obj = nav_utils.get_subtask_obj(subtask) delivery_loc = list( filter(lambda o: o.name == 'Delivery', self.world.get_object_list()))[0].location goal_obj_locs = self.world.get_all_object_locs(obj=goal_obj) if not any([gol == delivery_loc for gol in goal_obj_locs]): self.termination_info = "" self.successful = False return False self.termination_info = "Terminating because all deliveries were completed" self.successful = True return True def reward(self): reward = 0 for idx, subtask in enumerate(self.all_subtasks): # Check if subtask is complete if not self.subtask_status[idx]: _, goal_obj = nav_utils.get_subtask_obj(subtask) goal_obj_locs = self.world.get_all_object_locs(obj=goal_obj) if isinstance(subtask, recipe.Deliver): # If task is delivery then give max reward delivery_loc = list(filter(lambda o: o.name=='Delivery',\ self.world.get_object_list()))[0].location if goal_obj_locs: if all([gol == delivery_loc for gol in goal_obj_locs]): reward = 150 else: # for completion of any other subtasks give small rewards if goal_obj_locs: reward = 20 self.subtask_status[idx] = 1 if not self.tomato_start == self.state[28] and not self.pick: reward = 20 self.pick = True if reward == 0: reward = -0.01 print("Reward for step: {}".format(reward)) return reward def print_agents(self): for sim_agent in self.sim_agents: sim_agent.print_status() def display(self): # Print states for checking state_print = np.copy(self.state) state_print_map = state_print[:28].reshape((4, 7)) print(state_print_map) self.update_display() print(str(self)) def update_display(self): self.rep = self.world.update_display() for agent in self.sim_agents: x, y = agent.location self.rep[y][x] = str(agent) def get_agent_names(self): return [agent.name for agent in self.sim_agents] def run_recipes(self): """Returns different permutations of completing recipes.""" self.sw = STRIPSWorld(world=self.world, recipes=self.recipes) # [path for recipe 1, path for recipe 2, ...] where each path is a list of actions subtasks = self.sw.get_subtasks( max_path_length=self.arglist.max_num_subtasks) all_subtasks = [subtask for path in subtasks for subtask in path] print('Subtasks:', all_subtasks, '\n') return all_subtasks def is_collision(self, agent1_loc, agent2_loc, agent1_action, agent2_action): """Returns whether agents are colliding. Collisions happens if agent collide amongst themselves or with world objects.""" # Tracks whether agents can execute their action. execute = [True, True] # Collision between agents and world objects. agent1_next_loc = tuple( np.asarray(agent1_loc) + np.asarray(agent1_action)) if self.world.get_gridsquare_at(location=agent1_next_loc).collidable: # Revert back because agent collided. agent1_next_loc = agent1_loc agent2_next_loc = tuple( np.asarray(agent2_loc) + np.asarray(agent2_action)) if self.world.get_gridsquare_at(location=agent2_next_loc).collidable: # Revert back because agent collided. agent2_next_loc = agent2_loc # Inter-agent collision. if agent1_next_loc == agent2_next_loc: if agent1_next_loc == agent1_loc and agent1_action != (0, 0): execute[1] = False elif agent2_next_loc == agent2_loc and agent2_action != (0, 0): execute[0] = False else: execute[0] = False execute[1] = False # Prevent agents from swapping places. elif ((agent1_loc == agent2_next_loc) and (agent2_loc == agent1_next_loc)): execute[0] = False execute[1] = False return execute def check_collisions(self): """Checks for collisions and corrects agents' executable actions. Collisions can either happen amongst agents or between agents and world objects.""" execute = [True for _ in self.sim_agents] # Check each pairwise collision between agents. for i, j in combinations(range(len(self.sim_agents)), 2): agent_i, agent_j = self.sim_agents[i], self.sim_agents[j] exec_ = self.is_collision(agent1_loc=agent_i.location, agent2_loc=agent_j.location, agent1_action=agent_i.action, agent2_action=agent_j.action) # Update exec array and set path to do nothing. if not exec_[0]: execute[i] = False if not exec_[1]: execute[j] = False # Track collisions. if not all(exec_): collision = CollisionRepr( time=self.t, agent_names=[agent_i.name, agent_j.name], agent_locations=[agent_i.location, agent_j.location]) self.collisions.append(collision) print('\nexecute array is:', execute) # Update agents' actions if collision was detected. for i, agent in enumerate(self.sim_agents): if not execute[i]: agent.action = (0, 0) print("{} has action {}".format(color(agent.name, agent.color), agent.action)) def execute_navigation(self): for agent in self.sim_agents: interact(agent=agent, world=self.world) self.agent_actions[agent.name] = agent.action def cache_distances(self): """Saving distances between world objects.""" counter_grid_names = [ name for name in self.world.objects if "Supply" in name or "Counter" in name or "Delivery" in name or "Cut" in name ] # Getting all source objects. source_objs = copy.copy(self.world.objects["Floor"]) for name in counter_grid_names: source_objs += copy.copy(self.world.objects[name]) # Getting all destination objects. dest_objs = source_objs # From every source (Counter and Floor objects), # calculate distance to other nodes. for source in source_objs: self.distances[source.location] = {} # Source to source distance is 0. self.distances[source.location][source.location] = 0 for destination in dest_objs: # Possible edges to approach source and destination. source_edges = [ (0, 0) ] if not source.collidable else World.NAV_ACTIONS destination_edges = [ (0, 0) ] if not destination.collidable else World.NAV_ACTIONS # Maintain shortest distance. shortest_dist = np.inf for source_edge, dest_edge in product(source_edges, destination_edges): try: dist = nx.shortest_path_length( self.world.reachability_graph, (source.location, source_edge), (destination.location, dest_edge)) # Update shortest distance. if dist < shortest_dist: shortest_dist = dist except: continue # Cache distance floor -> counter. self.distances[source.location][ destination.location] = shortest_dist # Save all distances under world as well. self.world.distances = self.distances
class OvercookedEnvironment(gym.Env): """Environment object for Overcooked.""" def __init__(self, arglist): self.arglist = arglist self.t = 0 self.set_filename() # For visualizing episode. self.rep = [] # For tracking data during an episode. self.collisions = [] self.termination_info = "" self.successful = False def get_repr(self): return self.world.get_repr() + tuple([agent.get_repr() for agent in self.sim_agents]) def __str__(self): # Print the world and agents. _display = list(map(lambda x: ''.join(map(lambda y: y + ' ', x)), self.rep)) return '\n'.join(_display) def __eq__(self, other): return self.get_repr() == other.get_repr() def __copy__(self): new_env = OvercookedEnvironment(self.arglist) new_env.__dict__ = self.__dict__.copy() new_env.world = copy.copy(self.world) new_env.sim_agents = [copy.copy(a) for a in self.sim_agents] new_env.distances = self.distances # Make sure new objects and new agents' holdings have the right pointers. for a in new_env.sim_agents: if a.holding is not None: a.holding = new_env.world.get_object_at( location=a.location, desired_obj=None, find_held_objects=True) return new_env def set_filename(self): self.filename = "{}_agents{}_seed{}".format(self.arglist.level,\ self.arglist.num_agents, self.arglist.seed) model = "" if self.arglist.model1 is not None: model += "_model1-{}".format(self.arglist.model1) if self.arglist.model2 is not None: model += "_model2-{}".format(self.arglist.model2) if self.arglist.model3 is not None: model += "_model3-{}".format(self.arglist.model3) if self.arglist.model4 is not None: model += "_model4-{}".format(self.arglist.model4) self.filename += model def load_level(self, level, num_agents): x = 0 y = 0 with open('utils/levels/{}.txt'.format(level), 'r') as file: # Mark the phases of reading. phase = 1 for line in file: line = line.strip('\n') if line == '': phase += 1 # Phase 1: Read in kitchen map. elif phase == 1: for x, rep in enumerate(line): # Object, i.e. Tomato, Lettuce, Onion, or Plate. if rep in 'tlopz': counter = Counter(location=(x, y)) obj = Object( location=(x, y), contents=RepToClass[rep]()) counter.acquire(obj=obj) self.world.insert(obj=counter) self.world.insert(obj=obj) # GridSquare, i.e. Floor, Counter, Cutboard, Delivery. elif rep in RepToClass: newobj = RepToClass[rep]((x, y)) self.world.objects.setdefault(newobj.name, []).append(newobj) else: # Empty. Set a Floor tile. f = Floor(location=(x, y)) self.world.objects.setdefault('Floor', []).append(f) y += 1 # Phase 2: Read in recipe list. elif phase == 2: self.recipes.append(globals()[line]()) # Phase 3: Read in agent locations (up to num_agents). elif phase == 3: if len(self.sim_agents) < num_agents: loc = line.split(' ') sim_agent = SimAgent( name='agent-'+str(len(self.sim_agents)+1), id_color=COLORS[len(self.sim_agents)], location=(int(loc[0]), int(loc[1]))) self.sim_agents.append(sim_agent) self.distances = {} self.world.width = x+1 self.world.height = y self.world.perimeter = 2*(self.world.width + self.world.height) def reset(self): self.world = World(arglist=self.arglist) self.recipes = [] self.sim_agents = [] self.agent_actions = {} self.t = 0 # For visualizing episode. self.rep = [] # For tracking data during an episode. self.collisions = [] self.termination_info = "" self.successful = False # Load world & distances. self.load_level( level=self.arglist.level, num_agents=self.arglist.num_agents) self.all_subtasks = self.run_recipes() self.world.make_loc_to_gridsquare() self.world.make_reachability_graph() self.cache_distances() self.obs_tm1 = copy.copy(self) if self.arglist.record or self.arglist.with_image_obs: self.game = GameImage( filename=self.filename, world=self.world, sim_agents=self.sim_agents, record=self.arglist.record) self.game.on_init() if self.arglist.record: self.game.save_image_obs(self.t) return copy.copy(self) def close(self): return def step(self, action_dict): # Track internal environment info. self.t += 1 print("===============================") print("[environment.step] @ TIMESTEP {}".format(self.t)) print("===============================") # Get actions. for sim_agent in self.sim_agents: sim_agent.action = action_dict[sim_agent.name] # Check collisions. self.check_collisions() self.obs_tm1 = copy.copy(self) # Execute. self.execute_navigation() # Visualize. self.display() self.print_agents() if self.arglist.record: self.game.save_image_obs(self.t) new_obs = copy.copy(self) # Getting an image observation if self.arglist.with_image_obs: new_obs = self.game.get_image_obs() done = self.done() reward = self.reward() info = {"t": self.t, "obs": new_obs, "done": done, "termination_info": self.termination_info} return new_obs, reward, done, info def done(self): # Done if the episode maxes out if self.t >= self.arglist.max_num_timesteps and self.arglist.max_num_timesteps: self.termination_info = "Terminating because passed {} timesteps".format( self.arglist.max_num_timesteps) self.successful = False return True assert any([isinstance(subtask, recipe.Deliver) for subtask in self.all_subtasks]), "no delivery subtask" # Done if subtask is completed. for subtask in self.all_subtasks: # Double check all goal_objs are at Delivery. if isinstance(subtask, recipe.Deliver): _, goal_obj = nav_utils.get_subtask_obj(subtask) delivery_loc = list(filter(lambda o: o.name=='Delivery', self.world.get_object_list()))[0].location goal_obj_locs = self.world.get_all_object_locs(obj=goal_obj) if not any([gol == delivery_loc for gol in goal_obj_locs]): self.termination_info = "" self.successful = False return False self.termination_info = "Terminating because all deliveries were completed" self.successful = True return True def reward(self): return 1 if self.successful else 0 def print_agents(self): for sim_agent in self.sim_agents: sim_agent.print_status() def display(self): self.update_display() print(str(self)) def update_display(self): self.rep = self.world.update_display() for agent in self.sim_agents: x, y = agent.location self.rep[y][x] = str(agent) def get_agent_names(self): return [agent.name for agent in self.sim_agents] def run_recipes(self): """Returns different permutations of completing recipes.""" self.sw = STRIPSWorld(world=self.world, recipes=self.recipes) # [path for recipe 1, path for recipe 2, ...] where each path is a list of actions subtasks = self.sw.get_subtasks(max_path_length=self.arglist.max_num_subtasks) all_subtasks = [subtask for path in subtasks for subtask in path] print('Subtasks:', all_subtasks, '\n') return all_subtasks def get_AB_locs_given_objs(self, subtask, subtask_agent_names, start_obj, goal_obj, subtask_action_obj): """Returns list of locations relevant for subtask's Merge operator. See Merge operator formalism in our paper, under Fig. 11: https://arxiv.org/pdf/2003.11778.pdf""" # For Merge operator on Chop subtasks, we look at objects that can be # chopped and the cutting board objects. if isinstance(subtask, recipe.Chop): # A: Object that can be chopped. A_locs = self.world.get_object_locs(obj=start_obj, is_held=False) + list(map(lambda a: a.location,\ list(filter(lambda a: a.name in subtask_agent_names and a.holding == start_obj, self.sim_agents)))) # B: Cutboard objects. B_locs = self.world.get_all_object_locs(obj=subtask_action_obj) # For Merge operator on Deliver subtasks, we look at objects that can be # delivered and the Delivery object. elif isinstance(subtask, recipe.Deliver): # B: Delivery objects. B_locs = self.world.get_all_object_locs(obj=subtask_action_obj) # A: Object that can be delivered. A_locs = self.world.get_object_locs( obj=start_obj, is_held=False) + list( map(lambda a: a.location, list( filter(lambda a: a.name in subtask_agent_names and a.holding == start_obj, self.sim_agents)))) A_locs = list(filter(lambda a: a not in B_locs, A_locs)) # For Merge operator on Merge subtasks, we look at objects that can be # combined together. These objects are all ingredient objects (e.g. Tomato, Lettuce). elif isinstance(subtask, recipe.Merge): A_locs = self.world.get_object_locs( obj=start_obj[0], is_held=False) + list( map(lambda a: a.location, list( filter(lambda a: a.name in subtask_agent_names and a.holding == start_obj[0], self.sim_agents)))) B_locs = self.world.get_object_locs( obj=start_obj[1], is_held=False) + list( map(lambda a: a.location, list( filter(lambda a: a.name in subtask_agent_names and a.holding == start_obj[1], self.sim_agents)))) else: return [], [] return A_locs, B_locs def get_lower_bound_for_subtask_given_objs( self, subtask, subtask_agent_names, start_obj, goal_obj, subtask_action_obj): """Return the lower bound distance (shortest path) under this subtask between objects.""" assert len(subtask_agent_names) <= 2, 'passed in {} agents but can only do 1 or 2'.format(len(agents)) # Calculate extra holding penalty if the object is irrelevant. holding_penalty = 0.0 for agent in self.sim_agents: if agent.name in subtask_agent_names: # Check for whether the agent is holding something. if agent.holding is not None: if isinstance(subtask, recipe.Merge): continue else: if agent.holding != start_obj and agent.holding != goal_obj: # Add one "distance"-unit cost holding_penalty += 1.0 # Account for two-agents where we DON'T want to overpenalize. holding_penalty = min(holding_penalty, 1) # Get current agent locations. agent_locs = [agent.location for agent in list(filter(lambda a: a.name in subtask_agent_names, self.sim_agents))] A_locs, B_locs = self.get_AB_locs_given_objs( subtask=subtask, subtask_agent_names=subtask_agent_names, start_obj=start_obj, goal_obj=goal_obj, subtask_action_obj=subtask_action_obj) # Add together distance and holding_penalty. return self.world.get_lower_bound_between( subtask=subtask, agent_locs=tuple(agent_locs), A_locs=tuple(A_locs), B_locs=tuple(B_locs)) + holding_penalty def is_collision(self, agent1_loc, agent2_loc, agent1_action, agent2_action): """Returns whether agents are colliding. Collisions happens if agent collide amongst themselves or with world objects.""" # Tracks whether agents can execute their action. execute = [True, True] # Collision between agents and world objects. agent1_next_loc = tuple(np.asarray(agent1_loc) + np.asarray(agent1_action)) if self.world.get_gridsquare_at(location=agent1_next_loc).collidable: # Revert back because agent collided. agent1_next_loc = agent1_loc agent2_next_loc = tuple(np.asarray(agent2_loc) + np.asarray(agent2_action)) if self.world.get_gridsquare_at(location=agent2_next_loc).collidable: # Revert back because agent collided. agent2_next_loc = agent2_loc # Inter-agent collision. if agent1_next_loc == agent2_next_loc: if agent1_next_loc == agent1_loc and agent1_action != (0, 0): execute[1] = False elif agent2_next_loc == agent2_loc and agent2_action != (0, 0): execute[0] = False else: execute[0] = False execute[1] = False # Prevent agents from swapping places. elif ((agent1_loc == agent2_next_loc) and (agent2_loc == agent1_next_loc)): execute[0] = False execute[1] = False return execute def check_collisions(self): """Checks for collisions and corrects agents' executable actions. Collisions can either happen amongst agents or between agents and world objects.""" execute = [True for _ in self.sim_agents] # Check each pairwise collision between agents. for i, j in combinations(range(len(self.sim_agents)), 2): agent_i, agent_j = self.sim_agents[i], self.sim_agents[j] exec_ = self.is_collision( agent1_loc=agent_i.location, agent2_loc=agent_j.location, agent1_action=agent_i.action, agent2_action=agent_j.action) # Update exec array and set path to do nothing. if not exec_[0]: execute[i] = False if not exec_[1]: execute[j] = False # Track collisions. if not all(exec_): collision = CollisionRepr( time=self.t, agent_names=[agent_i.name, agent_j.name], agent_locations=[agent_i.location, agent_j.location]) self.collisions.append(collision) print('\nexecute array is:', execute) # Update agents' actions if collision was detected. for i, agent in enumerate(self.sim_agents): if not execute[i]: agent.action = (0, 0) print("{} has action {}".format(color(agent.name, agent.color), agent.action)) def execute_navigation(self): for agent in self.sim_agents: interact(agent=agent, world=self.world) self.agent_actions[agent.name] = agent.action def cache_distances(self): """Saving distances between world objects.""" counter_grid_names = [name for name in self.world.objects if "Supply" in name or "Counter" in name or "Delivery" in name or "Cut" in name] # Getting all source objects. source_objs = copy.copy(self.world.objects["Floor"]) for name in counter_grid_names: source_objs += copy.copy(self.world.objects[name]) # Getting all destination objects. dest_objs = source_objs # From every source (Counter and Floor objects), # calculate distance to other nodes. for source in source_objs: self.distances[source.location] = {} # Source to source distance is 0. self.distances[source.location][source.location] = 0 for destination in dest_objs: # Possible edges to approach source and destination. source_edges = [(0, 0)] if not source.collidable else World.NAV_ACTIONS destination_edges = [(0, 0)] if not destination.collidable else World.NAV_ACTIONS # Maintain shortest distance. shortest_dist = np.inf for source_edge, dest_edge in product(source_edges, destination_edges): try: dist = nx.shortest_path_length(self.world.reachability_graph, (source.location,source_edge), (destination.location, dest_edge)) # Update shortest distance. if dist < shortest_dist: shortest_dist = dist except: continue # Cache distance floor -> counter. self.distances[source.location][destination.location] = shortest_dist # Save all distances under world as well. self.world.distances = self.distances
class RealAgent: """Real Agent object that performs task inference and plans.""" def __init__(self, arglist, name, id_color, recipes): self.arglist = arglist self.name = name self.color = id_color self.recipes = recipes # Bayesian Delegation. self.reset_subtasks() self.new_subtask = None self.new_subtask_agent_names = [] self.incomplete_subtasks = [] self.signal_reset_delegator = False self.is_subtask_complete = lambda w: False self.beta = arglist.beta self.none_action_prob = 0.5 self.model_type = agent_settings(arglist, name) if self.model_type == "up": self.priors = 'uniform' else: self.priors = 'spatial' # Navigation planner. self.planner = E2E_BRTDP(alpha=arglist.alpha, tau=arglist.tau, cap=arglist.cap, main_cap=arglist.main_cap) def __str__(self): return color(self.name[-1], self.color) def __copy__(self): a = Agent(arglist=self.arglist, name=self.name, id_color=self.color, recipes=self.recipes) a.subtask = self.subtask a.new_subtask = self.new_subtask a.subtask_agent_names = self.subtask_agent_names a.new_subtask_agent_names = self.new_subtask_agent_names a.__dict__ = self.__dict__.copy() if self.holding is not None: a.holding = copy.copy(self.holding) return a def get_holding(self): if self.holding is None: return 'None' return self.holding.full_name def select_action(self, obs): """Return best next action for this agent given observations.""" print("Running select_action") sim_agent = list(filter(lambda x: x.name == self.name, obs.sim_agents))[0] self.location = sim_agent.location self.holding = sim_agent.holding self.action = sim_agent.action if obs.t == 0: self.setup_subtasks(env=obs) # Select subtask based on Bayesian Delegation. self.update_subtasks(env=obs) self.new_subtask, self.new_subtask_agent_names = self.delegator.select_subtask( agent_name=self.name) self.plan(copy.copy(obs)) return self.action def get_subtasks(self, world): """Return different subtask permutations for recipes.""" print("Running get_subtasks") self.sw = STRIPSWorld(world, self.recipes) # [path for recipe 1, path for recipe 2, ...] where each path is a list of actions. subtasks = self.sw.get_subtasks( max_path_length=self.arglist.max_num_subtasks) all_subtasks = [subtask for path in subtasks for subtask in path] # Uncomment below to view graph for recipe path i # i = 0 # pg = recipe_utils.make_predicate_graph(self.sw.initial, recipe_paths[i]) # ag = recipe_utils.make_action_graph(self.sw.initial, recipe_paths[i]) return all_subtasks def setup_subtasks(self, env): """Initializing subtasks and subtask allocator, Bayesian Delegation.""" print("Running setup_subtasks") self.incomplete_subtasks = self.get_subtasks(world=env.world) self.delegator = BayesianDelegator( agent_name=self.name, all_agent_names=env.get_agent_names(), model_type=self.model_type, planner=self.planner, none_action_prob=self.none_action_prob) def reset_subtasks(self): """Reset subtasks---relevant for Bayesian Delegation.""" print("Running reset_subtasks") self.subtask = None self.subtask_agent_names = [] self.subtask_complete = False def refresh_subtasks(self, world): """Refresh subtasks---relevant for Bayesian Delegation.""" print("Running refresh_subtasks") # Check whether subtask is complete. self.subtask_complete = False if self.subtask is None or len(self.subtask_agent_names) == 0: print("{} has no subtask".format(color(self.name, self.color))) return self.subtask_complete = self.is_subtask_complete(world) print( "{} done with {} according to planner: {}\nplanner has subtask {} with subtask object {}" .format(color(self.name, self.color), self.subtask, self.is_subtask_complete(world), self.planner.subtask, self.planner.goal_obj)) # Refresh for incomplete subtasks. if self.subtask_complete: if self.subtask in self.incomplete_subtasks: self.incomplete_subtasks.remove(self.subtask) self.subtask_complete = True print('{} incomplete subtasks:'.format(color(self.name, self.color)), ', '.join(str(t) for t in self.incomplete_subtasks)) def update_subtasks(self, env): """Update incomplete subtasks---relevant for Bayesian Delegation.""" print("Running update_subtasks") if ((self.subtask is not None and self.subtask not in self.incomplete_subtasks) or (self.delegator.should_reset_priors( obs=copy.copy(env), incomplete_subtasks=self.incomplete_subtasks))): self.reset_subtasks() self.delegator.set_priors( obs=copy.copy(env), incomplete_subtasks=self.incomplete_subtasks, priors_type=self.priors) else: if self.subtask is None: self.delegator.set_priors( obs=copy.copy(env), incomplete_subtasks=self.incomplete_subtasks, priors_type=self.priors) else: self.delegator.bayes_update(obs_tm1=copy.copy(env.obs_tm1), actions_tm1=env.agent_actions, beta=self.beta) def all_done(self): """Return whether this agent is all done. An agent is done if all Deliver subtasks are completed.""" if any([isinstance(t, Deliver) for t in self.incomplete_subtasks]): return False return True def get_action_location(self): """Return location if agent takes its action---relevant for navigation planner.""" return tuple(np.asarray(self.location) + np.asarray(self.action)) def plan(self, env, initializing_priors=False): """Plan next action---relevant for navigation planner.""" print( 'right before planning, {} had old subtask {}, new subtask {}, subtask complete {}' .format(self.name, self.subtask, self.new_subtask, self.subtask_complete)) # Check whether this subtask is done. if self.new_subtask is not None: self.def_subtask_completion(env=env) # If subtask is None, then do nothing. if (self.new_subtask is None) or (not self.new_subtask_agent_names): actions = nav_utils.get_single_actions(env=env, agent=self) probs = [] for a in actions: if a == (0, 0): probs.append(self.none_action_prob) else: probs.append( (1.0 - self.none_action_prob) / (len(actions) - 1)) self.action = actions[np.random.choice(len(actions), p=probs)] # Otherwise, plan accordingly. else: if self.model_type == 'greedy' or initializing_priors: other_agent_planners = {} else: # Determine other agent planners for level 1 planning. # Other agent planners are based on your planner---agents never # share planners. backup_subtask = self.new_subtask if self.new_subtask is not None else self.subtask other_agent_planners = self.delegator.get_other_agent_planners( obs=copy.copy(env), backup_subtask=backup_subtask) print("[ {} Planning ] Task: {}, Task Agents: {}".format( self.name, self.new_subtask, self.new_subtask_agent_names)) action = self.planner.get_next_action( env=env, subtask=self.new_subtask, subtask_agent_names=self.new_subtask_agent_names, other_agent_planners=other_agent_planners) # If joint subtask, pick your part of the simulated joint plan. if self.name not in self.new_subtask_agent_names and self.planner.is_joint: self.action = action[0] else: self.action = action[self.new_subtask_agent_names.index( self.name)] if self.planner.is_joint else action # Update subtask. self.subtask = self.new_subtask self.subtask_agent_names = self.new_subtask_agent_names self.new_subtask = None self.new_subtask_agent_names = [] print('{} proposed action: {}\n'.format(self.name, self.action)) def def_subtask_completion(self, env): # Determine desired objects. self.start_obj, self.goal_obj = nav_utils.get_subtask_obj( subtask=self.new_subtask) self.subtask_action_object = nav_utils.get_subtask_action_obj( subtask=self.new_subtask) # Define termination conditions for agent subtask. # For Deliver subtask, desired object should be at a Deliver location. if isinstance(self.new_subtask, Deliver): self.cur_obj_count = len( list( filter( lambda o: o in set( env.world.get_all_object_locs( self.subtask_action_object)), env.world.get_object_locs(obj=self.goal_obj, is_held=False)))) self.has_more_obj = lambda x: int(x) > self.cur_obj_count self.is_subtask_complete = lambda w: self.has_more_obj( len( list( filter( lambda o: o in set( env.world.get_all_object_locs( obj=self.subtask_action_object)), w.get_object_locs(obj=self.goal_obj, is_held=False) )))) # Otherwise, for other subtasks, check based on # of objects. else: # Current count of desired objects. self.cur_obj_count = len( env.world.get_all_object_locs(obj=self.goal_obj)) # Goal state is reached when the number of desired objects has increased. self.is_subtask_complete = lambda w: len( w.get_all_object_locs(obj=self.goal_obj)) > self.cur_obj_count