def _s_to_obs(self, s): """Returns an array representation of the env to be used as an observation. The representation has dimensions (height, width, 6) and consist of: - 2d grid with 1 in the agent's position if it does not carry a battery and 2 if it does, and 0 everywhere else - 2d grid with life of each train in the train's position - one-hot encoding of all trains with 0 life - one-hot encoding of all batteries that have not been collected - one-hot encoding of all batteries that have been collected - one-hot encoding of the goals """ layers = [ [s.agent_pos], [s.train_pos] if s.train_life > 0 else [], [s.train_pos] if s.train_life == 0 else [], [pos for pos, present in s.battery_present.items() if present], [pos for pos, present in s.battery_present.items() if not present], self.feature_locations, ] obs = get_grid_representation(self.width, self.height, layers) if s.carrying_battery: obs[:, :, 0] *= 2 obs[:, :, 1] *= s.train_life return np.array(obs, dtype=np.float32)
def _s_to_obs(self, s): """Returns an array representation of the env to be used as an observation. The representation has dimensions (height, width, 6) and consist of: - 2d grid with the agents orientation at the agent's position and 0 else - 2d grid with 1 in the agent's position iff it is carrying an apple, and 0 everywhere else - one-hot encoding of all trees that have apples - one-hot encoding of all trees that do not have apples - one-hot encoding of all buckets - 2d grid with the number of apples for each bucket in the bucket's location and 0 everywhere else """ orientation, agent_x, agent_y = s.agent_pos layers = [ [(agent_x, agent_y)], [(agent_x, agent_y)] if s.carrying_apple else [], [pos for pos, has_apple in s.tree_states.items() if has_apple], [pos for pos, has_apple in s.tree_states.items() if not has_apple], self.bucket_locations, self.bucket_locations, ] obs = get_grid_representation(self.width, self.height, layers) obs[:, :, 0] *= orientation + 1 for (bucket_x, bucket_y), num_apples in s.bucket_states.items(): obs[bucket_y, bucket_x, 5] = num_apples return np.array(obs, dtype=np.float32)
def _s_to_obs(self, s): """Returns an array representation of the env to be used as an observation. The representation has dimensions (5, height, width) and consist of one-hot encodings of: - the agent's position - intact vases - broken vases - carpets - goals """ layers = [ [s.agent_pos], [pos for pos, intact in s.vase_states.items() if intact], [pos for pos, intact in s.vase_states.items() if not intact], self.carpet_locations, self.feature_locations, ] obs = get_grid_representation(self.width, self.height, layers) return np.array(obs, dtype=np.float32)
def _s_to_obs(self, s): layers = [[s]] obs = get_grid_representation(self.width, self.height, layers) return np.array(obs, dtype=np.float32)