コード例 #1
0
ファイル: kitchen_salt.py プロジェクト: praal/align
def evaluate_second_agent(env, policy1, reward1, env2, policy2, reward2):

    print("Acting agent enters")
    init = [1, 1]
    initial_state = KitchenState(init[0], init[1], set(), init[0], init[1])
    env.reset(initial_state)
    reward1.reset()
    policy1.reset(evaluation=True)
    trial_reward: float = 0.0
    for step in range(TEST_EPISODE_LENGTH):
        s0 = env.state
        a = policy1.get_best_action(s0)
        env.apply_action(a)
        s1 = env.state
        print_state(s0, a)
        step_reward, finished = reward1(s0, a, s1)
        if not finished:
            trial_reward += step_reward
        logging.debug("(%s, %s, %s) -> %s", s0, a, s1, step_reward)
        if finished:
            print_state(s1, -1)
            break

    facts = set()
    if env.state.facts[OBJECTS4["open"]]:
        facts.add(1)
    if env.state.facts[OBJECTS4["up"]]:
        facts.add(4)

    print("Agent 2 enters")
    env2.reset(KitchenState(init[0], init[1], facts, init[0], init[1]))

    trial_reward2 = 0
    for step in range(TEST_EPISODE_LENGTH):
        s0 = env2.state
        a = policy2.get_best_action(s0)
        env2.apply_action(a)
        s1 = env2.state
        print_state(s0, a)
        step_reward, finished = reward2(s0, a, s1)
        if not finished:
            trial_reward2 += step_reward
        logging.debug("(%s, %s, %s) -> %s", s0, a, s1, step_reward)
        if finished:
            print_state(s1, -1)
            break
    return trial_reward, trial_reward2
コード例 #2
0
ファイル: kitchen_salt.py プロジェクト: praal/align
def create_init(init_locations):
    ans = []
    for j in init_locations:
        ans.append(KitchenState(j[0], j[1], (), j[0], j[1]))

    return ans