Ejemplo n.º 1
0
    def test_add_state(self):

        state1 = [[0, 0, 0, 0, 0, 0, 0, 0, 0], [0.5]]
        state2 = [[1, 0, 0, 0, 0, 0, 0, 0, 0], [0.75]]
        state3 = [[0, 0, 0, 0, 0, 0, 0, 0, 0], [0.5]]
        state4 = [[0, 0, 0, 0, 0, 0, 0, 0, 0], [0.8]]
        state5 = [[0, 0, 0, 0, 0, -1, 0, 0, 0], [0.5]]

        agent = Agent(1)
        agent.add_state(state1)
        agent.add_state(state2)
        agent.add_state(state3)
        agent.add_state(state4)
        agent.add_state(state5)

        self.assertEqual(len(agent.states), 5)
Ejemplo n.º 2
0
    def test_turn(self):

        agent = Agent(1)

        board = [1, 0, 0, 1, 0, -1, 0, -1, 0]

        state1 = [[1, 1, 0, 1, 0, -1, 0, -1, 0], [0.5]]
        state2 = [[1, 1, 0, 1, 0, -1, 0, -1, 0], [0.75]]
        state3 = [[1, 1, 0, 1, 0, -1, 0, -1, 0], [0.95]]
        state4 = [[1, 1, 0, 1, 0, -1, 0, -1, 0], [0.8]]

        agent.add_state(state1)
        agent.add_state(state2)
        agent.add_state(state3)
        agent.add_state(state4)

        self.assertEqual(agent.turn(board), [1, 1, 0, 1, 0, -1, 0, -1, 0])
Ejemplo n.º 3
0
    def test_backprop_state_value(self):

        state1 = [[1, 1, 0, 1, 1, -1, -1, -1, 0], [0.5]]
        state2 = [[1, 1, 0, 1, 1, -1, -1, -1, 0], [0.7]]

        agent = Agent(1)
        agent.add_state(state1)
        agent.add_state(state2)

        board = [1, 1, 0, 1, 0, -1, -1, -1, 0]

        agent.turn(board)
        self.assertEqual(agent.pre_state[1], [0.7])
        self.assertEqual(agent.current_state[1], [0.7])
Ejemplo n.º 4
0
    def test_is_state_in_states(self):

        agent = Agent(1)

        state1 = [[1, 0, 0, 1, 0, -1, 0, 0, 0], [0.5]]
        state2 = [[1, 0, 0, 0, 0, 0, 0, 0, 0], [0.75]]
        state3 = [[1, 0, 0, 1, 0, -1, 0, 0, 0], [0.5]]
        state4 = [[1, 0, 0, 0, 0, -1, 0, 1, 0], [0.8]]

        state5 = [[1, -1, 0, 0, 0, -1, 0, 1, 0], [0.8]]

        agent.add_state(state1)
        agent.add_state(state2)
        agent.add_state(state3)
        agent.add_state(state4)

        self.assertEqual(agent.is_state_in_states(state3), True)
        self.assertEqual(agent.is_state_in_states(state5), False)
Ejemplo n.º 5
0
    def test_update_state(self):

        agent = Agent(1)

        state1 = [[1, 0, 0, 1, 0, -1, 0, 0, 0], [0.5]]
        state2 = [[1, 0, 0, 0, 0, 0, 0, 0, 0], [0.75]]
        state3 = [[0, 0, 0, 0, 0, 0, 0, 0, 0], [0.5]]
        state4 = [[1, 0, 0, 0, 0, -1, 0, 1, 0], [0.8]]

        agent.add_state(state1)
        agent.add_state(state2)
        agent.add_state(state3)
        agent.add_state(state4)

        new_state_2 = [[1, 0, 0, 0, 0, 0, 0, 0, 0], [0.75]]
        new_state_4 = [[1, 0, 0, 0, 0, -1, 0, 1, 0], [0.15]]

        agent.update_state(new_state_2)
        agent.update_state(new_state_4)

        self.assertEqual(agent.states[1], new_state_2)
        self.assertEqual(agent.states[3], new_state_4)
Ejemplo n.º 6
0
    def test_greedy(self):

        agent = Agent(1)

        board = [1, 0, 0, 0, 0, -1, 0, 0, 0]

        state1 = [[1, 0, 1, 0, 0, -1, 0, 0, 0], [0.5]]
        state2 = [[1, 0, 0, 0, 1, -1, 0, 0, 0], [0.95]]
        state3 = [[1, 1, 0, 0, 0, -1, 0, 0, 0], [0.5]]
        state4 = [[1, 0, 0, 0, 0, -1, 0, 0, 1], [0.8]]

        agent.add_state(state1)
        agent.add_state(state2)
        agent.add_state(state3)
        agent.add_state(state4)

        self.assertEqual(agent.greedy(board),
                         [[1, 0, 0, 0, 1, -1, 0, 0, 0], [0.95]])

        agent = Agent(-1)

        board = [1, 0, 0, 0, 0, -1, 0, 1, 0]

        state1 = [[1, 0, 0, 0, 0, -1, 0, 1, -1], [0.5]]
        state2 = [[1, 0, 0, 0, 0, -1, 0, 1, 0], [0.95]]
        state3 = [[1, 0, 0, -1, 0, -1, 0, 1, 0], [0.75]]
        state4 = [[1, 0, 0, 0, 0, -1, 0, 1, 0], [0.8]]

        agent.add_state(state1)
        agent.add_state(state2)
        agent.add_state(state3)
        agent.add_state(state4)

        self.assertEqual(agent.greedy(board),
                         [[1, 0, 0, -1, 0, -1, 0, 1, 0], [0.75]])

        agent = Agent(-1)

        board = [1, 1, 0, 1, 0, -1, 0, -1, 0]

        state1 = [[1, 1, 1, 1, 0, -1, 0, -1, -1], [0.5]]
        state2 = [[1, 1, 0, 1, 0, -1, 1, -1, 0], [0.75]]

        agent.add_state(state1)
        agent.add_state(state2)

        self.assertEqual(agent.greedy(board), [[], [0]])
Ejemplo n.º 7
0
    def test_next_states(self):

        board = [1, 0, 0, 0, 0, -1, 0, 0, 0]

        state1 = [[1, 0, 0, 1, 0, -1, 0, 0, 0], [0.5]]
        state2 = [[1, 0, 0, 0, 0, 0, 0, 0, 0], [0.75]]
        state3 = [[0, 0, 0, 0, 0, 0, 0, 0, 0], [0.5]]
        state4 = [[1, 0, 0, 0, 0, -1, 0, 1, 0], [0.8]]

        agent = Agent(1)
        agent.add_state(state1)
        agent.add_state(state2)
        agent.add_state(state3)
        agent.add_state(state4)

        self.assertEqual(agent.next_states(board),
                         [[[1, 0, 0, 1, 0, -1, 0, 0, 0], [0.5]],
                          [[1, 0, 0, 0, 0, -1, 0, 1, 0], [0.8]]])

        board = [1, 0, 0, 1, 0, -1, 0, -1, 0]

        state5 = [[1, 0, 0, 1, 0, -1, 0, 0, 0], [0.5]]
        state6 = [[1, 0, 0, 0, 0, 0, 0, 0, 0], [0.75]]
        state7 = [[0, 0, 0, 0, 0, 0, 0, 0, 0], [0.5]]
        state8 = [[1, 0, 0, 0, 0, -1, 0, 1, 0], [0.8]]
        state9 = [[1, 0, 1, 1, 0, -1, 0, -1, 0], [0.8]]
        state10 = [[1, 0, -1, 1, 0, -1, 0, -1, 0], [0.8]]
        state11 = [[1, 0, 0, 1, 0, -1, 0, -1, 1], [0.9]]

        agent.add_state(state5)
        agent.add_state(state6)
        agent.add_state(state7)
        agent.add_state(state8)
        agent.add_state(state9)
        agent.add_state(state10)
        agent.add_state(state11)

        self.assertEqual(agent.next_states(board),
                         [[[1, 0, 1, 1, 0, -1, 0, -1, 0], [0.8]],
                          [[1, 0, 0, 1, 0, -1, 0, -1, 1], [0.9]]])

        agent = Agent(-1)

        board = [1, 1, 0, 1, 0, -1, 0, -1, 0]

        state1 = [[1, 1, 0, 1, 0, -1, 0, -1, -1], [0.5]]
        state2 = [[1, 1, 0, 1, 0, -1, 0, -1, 0], [0.75]]

        agent.add_state(state1)
        agent.add_state(state2)

        self.assertEqual(agent.next_states(board),
                         [[[1, 1, 0, 1, 0, -1, 0, -1, -1], [0.5]]])

        agent = Agent(-1)

        board = [1, 1, 0, 1, 0, -1, 0, -1, 0]

        state1 = [[1, 1, 1, 1, 0, -1, 0, -1, -1], [0.5]]
        state2 = [[1, 1, 0, 1, 0, -1, 1, -1, 0], [0.75]]

        agent.add_state(state1)
        agent.add_state(state2)

        self.assertEqual(agent.next_states(board), [])
Ejemplo n.º 8
0
def train():
    stopwatch = Stopwatch()
    stopwatch.start()

    board = create_empty_board()
    agent_x = Agent(1, LEARNING_RATE, AGENT_X_RANDOM_TURNS)
    agent_o = Agent(-1, LEARNING_RATE, AGENT_O_RANDOM_TURNS)
    for i in range(TRAINING_TURNS):
        agent_x.turn(board)
        if is_game_over(board):
            board = create_empty_board()
            agent_x.turn(board)
            agent_x.reset_both_states()
            agent_o.reset_both_states()
        agent_o.turn(board)
        if is_game_over(board):
            board = create_empty_board()
            agent_x.reset_both_states()
            agent_o.reset_both_states()
        if (i % 100) == 0:
            print('Iteration= {} of {}.'.format(i, TRAINING_TURNS))
            print('Training time= {}'.format(stopwatch.elapsed()))
    return agent_x.states