コード例 #1
0
ファイル: qlagent.py プロジェクト: williampark73/snake
def train(num_trials=40):

    score1 = 0
    score2 = 0
    player1 = 0
    player2 = 0

    for trial in range(num_trials):

        game = SnakeGame(board_size=(20, 25))
        state = game.start_state()
        game.print_board(state)

        while True:
            action = minimax_agent_first_index(game, state)
            state = game.successor(state, action, True)

            if game.is_end(state)[0] == True:

                reward = game.is_end(state)[2] - state[3][1]
                incorporateFeedback(game, state, action, reward, succ)

                break
            game.print_board(state)

            current_dir = state[2][1]
            actions = get_valid(current_dir, game.actions())

            action = get_QL_Action(game, state, actions)

            succ = game.successor(state, action)

            snake = succ[1][1]
            food = state[4]

            reward = succ[3][1] - state[3][1]
            #reward = 100*(succ[3][1]- state[3][1]) -((snake[0][0] - food[0])**2 + (snake[0][1] - food[1])**2)

            result = game.is_end(succ)

            state[0].addstr(28, 10, ' Reward: ' + str(reward) + '     ')
            state[0].addstr(29, 10, ' ScoreNow: ' + str(succ[3][1]) + '     ')
            state[0].addstr(30, 10, ' ScorePrev: ' + str(state[3][1]) + '    ')

            incorporateFeedback(game, state, action, reward, succ)

            game.print_board(state)
            state = succ

            if game.is_end(state)[0] == True:
                break

        global explorationProb
        explorationProb = explorationProb / 2

    curses.endwin()
    '''
コード例 #2
0
ファイル: main.py プロジェクト: williampark73/snake
def play_snake_game(agent_one, agent_two):

    player1 = 0
    player2 = 0

    for i in range(100):
        states = []
        game = SnakeGame(board_size=(20, 40))
        state = game.start_state()
        states.append(state)

        game.print_board(state)
        while True:
            action = agent_one(game, state)
            state = game.successor(state, action, True)
            states.append(state)

            if game.is_end(state)[0] == True:
                break
            game.print_board(state)

            action = agent_two(game, state)
            state = game.successor(state, action, True)
            states.append(state)
            if game.is_end(state)[0] == True:
                break
            game.print_board(state)

        result = game.is_end(state)
        curses.endwin()

        if result[1] == 0:
            print("Tie game")
        elif result[1] == 1:
            #print("Agent 2 wins")
            player2 += 1
        else:
            #print("Agent 1 wins")
            player1 += 1
        '''
		if state[5] == 1:
			print("Agent 1 score: " + str(result[2]))
			print("Agent 2 score: " + str(result[3]))
		else:
			print("Agent 1 score: " + str(result[3]))
			print("Agent 2 score: " + str(result[2]))
		'''

    print("Player 1 wins: " + str(player1))
    print("Player 2 wins: " + str(player2))