Example #1
0
def train_ep_p2(net, policy, render=False):
    state = env.reset()
    ep_reward = 0
    ctr = 0
    if render:
        env.render()
    while True:
        # env.render()
        if ctr > 0:  # skip player1's first turn so that he goes second
            action = choose_action('self', 1, state, net, 0.05)
            next_state, reward, done, info = env.step(action)
            if render:
                print('Player 1 moves', action)
                env.render()
        else:
            next_state = state
            reward = 0
            done = False
            info = {'next_player': 2}

        # Let player 2 play
        p2_reward = 0
        while info['next_player'] == 2 and not done:
            action2 = choose_action(policy, 2, next_state, net, 0.05)
            next_state, reward2, done, info = env.step(action2)
            if render:
                print('Player 2 moves', action2)
                env.render()
            p2_reward += reward2

        if ctr == 0:
            ctr += 1
            continue

        net.store_transition(state, action, reward - p2_reward, next_state)
        ep_reward += reward

        net.learn()

        if done:
            ctr = 0
            break
        ctr += 1

        state = next_state
    if ep_reward > 49:
        win = 1
    else:
        win = 0
    return ep_reward, win
Example #2
0
def test_ep_pvp(net, net2, num_test, eps=0.05, render=False):

    test_reward = []
    test_reward_p2 = []
    test_win = 0
    draw = 0

    for i in range(num_test):
        state = env.reset()
        ep_reward = 0
        p2_reward = 0
        ctr = 0
        if render:
            env.render()
        while True:
            # env.render()
            action = choose_action('self', 1, state, net, eps)
            next_state, reward, done, info = env.step(action)
            if render:
                print('Player 1 moves', action)
                env.render()

            while info['next_player'] == 2 and not done:
                action2 = choose_action('self', 2, next_state, net2, eps)
                next_state, reward2, done, info = env.step(action2)
                if render:
                    print('Player 2 moves', action2)
                    env.render()
                p2_reward += reward2
                # state = next_state

            ep_reward += reward
            if done:
                break

            state = next_state
            ctr += 1

        test_reward_p2.append(p2_reward)
        test_reward.append(ep_reward)
        if ep_reward > 49:
            test_win += 1
        if ep_reward == 49:
            draw += 1

    return np.mean(test_reward), np.mean(
        test_reward_p2), test_win / num_test, draw / num_test
Example #3
0
def test_ep_p2(net, policy, num_test, eps=0.05, render=False):

    test_reward = []
    test_win = 0

    for i in range(num_test):
        state = env.reset()
        ep_reward = 0
        ctr = 0
        if render:
            env.render()
        while True:
            # env.render()
            if ctr > 0:  # skip player1's first turn so that he goes second
                action = choose_action('self', 1, state, net, eps)
                next_state, reward, done, info = env.step(action)
                if render:
                    print('Player 1 moves', action)
                    env.render()
            else:
                next_state = state
                reward = 0
                done = False
                info = {'next_player': 2}

            p2_reward = 0
            while info['next_player'] == 2 and not done:
                action2 = choose_action(policy, 2, next_state, net, eps)
                next_state, reward2, done, info = env.step(action2)
                if render:
                    print('Player 2 moves', action2)
                    env.render()
                p2_reward += reward2
                # state = next_state

            ep_reward += reward
            if done:
                break

            state = next_state
            ctr += 1

        test_reward.append(ep_reward)
        if ep_reward > 49:
            test_win += 1

    return np.mean(test_reward), test_win / num_test
Example #4
0
def train_ep(net, policy, render=False):
    state = env.reset()
    ep_reward = 0
    ctr = 0
    if render:
        env.render()
    while True:
        # env.render()
        action = choose_action('self', 1, state, net, 0.05)
        next_state, reward, done, info = env.step(action)
        if render:
            print('Player 1 moves', action)
            env.render()

        # Let player 2 play
        p2_reward = 0
        while info['next_player'] == 2 and not done:
            action2 = choose_action(policy, 2, next_state, net, 0.05)
            next_state, reward2, done, info = env.step(action2)
            if render:
                print('Player 2 moves', action2)
                env.render()
            p2_reward += reward2

        net.store_transition(state, action, reward - p2_reward, next_state)
        ep_reward += reward

        net.learn()

        if done:
            ctr = 0
            break
        ctr += 1

        state = next_state
    if ep_reward > 49:
        win = 1
    else:
        win = 0
    return ep_reward, win