def evaluate_second_agent(env, policy1, reward1, env2, policy2, reward2): print("Acting agent enters") init = [1, 1] initial_state = KitchenState(init[0], init[1], set(), init[0], init[1]) env.reset(initial_state) reward1.reset() policy1.reset(evaluation=True) trial_reward: float = 0.0 for step in range(TEST_EPISODE_LENGTH): s0 = env.state a = policy1.get_best_action(s0) env.apply_action(a) s1 = env.state print_state(s0, a) step_reward, finished = reward1(s0, a, s1) if not finished: trial_reward += step_reward logging.debug("(%s, %s, %s) -> %s", s0, a, s1, step_reward) if finished: print_state(s1, -1) break facts = set() if env.state.facts[OBJECTS4["open"]]: facts.add(1) if env.state.facts[OBJECTS4["up"]]: facts.add(4) print("Agent 2 enters") env2.reset(KitchenState(init[0], init[1], facts, init[0], init[1])) trial_reward2 = 0 for step in range(TEST_EPISODE_LENGTH): s0 = env2.state a = policy2.get_best_action(s0) env2.apply_action(a) s1 = env2.state print_state(s0, a) step_reward, finished = reward2(s0, a, s1) if not finished: trial_reward2 += step_reward logging.debug("(%s, %s, %s) -> %s", s0, a, s1, step_reward) if finished: print_state(s1, -1) break return trial_reward, trial_reward2
def create_init(init_locations): ans = [] for j in init_locations: ans.append(KitchenState(j[0], j[1], (), j[0], j[1])) return ans