Beispiel #1
0
def markovDecision(layout, circle):
    env = SnakesAndLadder(layout, circle)
    agent = RandomAgent(env.action_space)

    n_episodes = 50

    for episode in range(n_episodes):
        state = env.reset()
        done = False
        while not done:
            action = agent.select_action(state)
            next_state, reward, done = env.step(action)

            agent.update(state, action, reward, next_state)

            state = next_state
Beispiel #2
0
possible_actions = [0, 1]  # Cooperate or Defect
cooperator, defector = RandomAgent(possible_actions, p=0.9), RandomAgent(possible_actions, p=0.1)

# Stateless interactions (agents do not have memory)
s = None

n_iter = 1000
for i in range(n_iter):

    # A full episode:
    done = False

    while not done:

        # Agents decide
        a0 = cooperator.act()
        a1 = defector.act()

        # World changes
        new_s, (r0, r1), done, _ = env.step(([a0], [a1]))

        # Agents learn
        cooperator.update(s, (a0, a1), (r0, r1), new_s )
        defector.update(s, (a1, a0), (r1, r0), new_s )

        s = new_s
        print(r0, r1)

    env.reset()