def play(total_score, total_test_case):
    thread_total_score = 0
    thread_test_case = NUM_OF_TEST_CASE // NUM_OF_THREAD
    for i in range(thread_test_case):
        game = Game(show=False)
        for j in range(ROUND_PER_EPISODE):
            ## pick an action from stategy
            if j % 3 == 0:
                gameboard = GameBoard(game.gameboard.board)
                num_available_choices = len(gameboard.get_available_choices())
                init_state = State(gameboard.board, 0, [], num_available_choices)
                root_node = Node(state=init_state)
                current_node = root_node

            current_node = monte_carlo_tree_search(current_node)
            x, y = current_node.state.get_choice()

            ##########################
            game.input_pos(x, y)
        thread_total_score += game.gameboard.score - ROUND_PER_EPISODE * PENALTY_PER_STEP

    lock.acquire()
    total_test_case.value += thread_test_case
    total_score.value += thread_total_score
    lock.release()
Ejemplo n.º 2
0
def policy_play(agent):
    game = Game(show=False)
    while not game.termination():
        board = game.gameboard.board
        choice = agent.best_move(board)
        game.input_pos(choice[0], choice[1])
    return game.gameboard.score
Ejemplo n.º 3
0
def td_learning(mode):
    global epsilon
    max_score = 0
    if mode != "new":
        start_episode = int(mode)
        net.load_state_dict(torch.load(network_path + "net" + mode + ".pth"))
        target_net.load_state_dict(torch.load(network_path + "net" + mode + ".pth"))
        load_train_data(mode)
        global epsilon
        epsilon = epsilon - epsilon_decay * start_episode
        print("load the network and train data " + mode)

    else:
        start_episode = 0
    for episode in range(start_episode, nEpisode):
        ## init state
        game = Game(show=False)
        round = 0
        while not game.termination():
            ## pick an action
            possible_actions = game.gameboard.get_available_choices()
            ## choice is a flatten action
            current_state = game.gameboard.board
            choice = greedy_policy(current_state, episode, possible_actions, net)
            choice2d = deflatten_action(choice)
            next_state, reward = game.input_pos(choice2d[0], choice2d[1])
            ## simulation
            last_state, total_reward = game_simulation(next_state)
            total_reward += reward

            if epsilon > final_epsilon and episode > observations_steps:
                epsilon -= epsilon_decay

            replay_memory.append((current_state, choice, total_reward, last_state))

            if episode > observations_steps:
                if round == 0:
                    batch_learning()
                if episode % target_model_period == 0 and game.gameboard.round_index == 1:
                    target_net.load_state_dict(net.state_dict())
                    print("Update the target net")


            if game.gameboard.score > max_score and episode > observations_steps:
                if game.gameboard.round_index == 1 and episode == observations_steps + 1:
                    print("Finish observations")
                max_score = game.gameboard.score
                print("max score is %d in episode %d" % (max_score, episode))

            round = (round + 1) % (batch_size // batch_size)

        if episode % save_model_period == 0:
            print("save model in episode %d" % (episode))
            save_net(net, episode)
            save_train_data(episode)

    print("save model in episode %d" % (nEpisode))
    save_net(net, nEpisode)
    save_train_data(nEpisode)
Ejemplo n.º 4
0
def mcts_play(agent):
    game = Game(show=False)
    mcts = MCTS(game.gameboard, agent)
    while not game.termination():
        current_node = mcts.get_next_node()
        choice = current_node.state.choice
        game.input_pos(choice[0], choice[1])
    return game.gameboard.score
Ejemplo n.º 5
0
def play(agent, show=False):
    game = Game(show=show)
    state = game.gameboard.board
    #game.gameboard.print_board()
    while not game.termination():
        choice = agent.get_action(game.gameboard.board)
        game.input_pos(choice[0], choice[1])
    return game.gameboard.score
Ejemplo n.º 6
0
def play(mode):

    game = Game(filepath)
    load_net(mode)
    state = game.gameboard.board
    while not game.termination():
        choice = get_action(state)
        choice2d = deflatten_action(choice)
        state, reward = game.input_pos(choice2d[0], choice2d[1])
Ejemplo n.º 7
0
def play(agent, show=False):
    game = Game(show=show)
    state = game.gameboard.board
    #game.gameboard.print_board()
    while not game.termination():
        choice = agent.best_move(game.gameboard.board,
                                 game.gameboard.get_available_choices())
        game.input_pos(choice[0], choice[1])
    return game.gameboard.score
Ejemplo n.º 8
0
def play():

    game = Game()
    state = game.gameboard.board
    #game.gameboard.print_board()
    while not game.termination():
        choice = get_action(state)
        choice2d = deflatten_action(choice)
        ### check the action is available?
        state, reward = game.input_pos(choice2d[0], choice2d[1])
    return game.gameboard.score
Ejemplo n.º 9
0
def get_memory(replay_memory, agent, n=10):
    for _ in range(n):
        game = Game(show=False)
        state = game.gameboard.board
        #game.gameboard.print_board()
        while not game.termination():
            board = copy.deepcopy(game.gameboard.board)
            choice = agent.best_move(game.gameboard.board,
                                     game.gameboard.get_available_choices())
            next_board, reward = game.input_pos(choice[0], choice[1])
            next_board = copy.deepcopy(next_board)
            replay_memory.append((board, choice, reward, next_board))
def play(mode, filename, number):
    load_net(mode)
    fd = open("./output/" + filename, 'w')
    for i in range(number):
        game = Game(show=False)
        state = game.gameboard.board
        game.gameboard.print_board()
        while not game.termination():
            choice = get_action(state)
            choice2d = deflatten_action(choice)
            state, reward = game.input_pos(choice2d[0], choice2d[1])
        fd.write(str(game.gameboard.score) + "\n")
    fd.close()
Ejemplo n.º 11
0
def run_episode(agent):
    train_data = []
    game = Game(show=False)
    while not game.termination():
        board = copy.deepcopy(game.gameboard.board)
        choice = agent.greedy_policy(board,
                                     game.gameboard.get_available_choices())
        _, reward = game.input_pos(choice[0], choice[1])
        train_data.append([board, reward, choice])

    ## correct the reward
    for i in reversed(range(len(train_data) - 1)):
        train_data[i][1] += GAMMA_RATE * train_data[i + 1][1]

    return train_data[0:-CUT]
Ejemplo n.º 12
0
def run_episode(agent):
    train_data = []
    game = Game(show=False)

    mcts = MCTS(game.gameboard, agent)

    while not game.termination():
        board = copy.deepcopy(game.gameboard.board)
        current_node = mcts.get_next_node()
        if current_node == None:
            break
        choice = current_node.state.get_choice()
        _, reward = game.input_pos(choice[0], choice[1])
        train_data.append([board, reward, choice])

    return train_data
def run_episode():
    train_data = []
    game = Game(show=False)

    current_node = init_first_node(game.gameboard)

    while not game.termination():

        current_node, pi = monte_carlo_tree_search(current_node)

        choice = current_node.state.get_choice()
        flat_choice = flatten_action(choice)
        net_index = action_index2net_index(flat_choice)
        one_data = [deepcopy(game.gameboard.board), net_index, pi, 0]

        state, reward = game.input_pos(choice[0], choice[1])
        one_data[3] = reward
        train_data.append(one_data)

    ## correct the reward
    for i in reversed(range(len(train_data) - 1)):
        train_data[i][3] += GAMMA_RATE * train_data[i + 1][3]
    return train_data
Ejemplo n.º 14
0
def run_episode(agent):
    train_data = []
    game = Game(show=False)

    mcts = MCTS(game.gameboard, agent)

    while not game.termination():
        board = copy.deepcopy(game.gameboard.board)
        current_node = mcts.get_next_node()
        if current_node == None:
            break
        choice = current_node.state.get_choice()
        _, reward = game.input_pos(choice[0], choice[1])
        train_data.append([board, reward, choice])

    ## correct the reward
    for i in reversed(range(len(train_data) - 1)):
        train_data[i][1] += GAMMA_RATE * train_data[i + 1][1]


#     for i in range(len(train_data) - 2):
#         train_data[i][1] += train_data[i+1][1] + train_data[i+2][1]

    return train_data
Ejemplo n.º 15
0
def play(filename, number):
    fd = open("./output/" + filename, 'w')
    for i in range(number):
        game = Game(show=False)
        state = game.gameboard.board
        game.gameboard.print_board()
        while not game.termination():
            choice = game.random_player()
            game.input_pos(choice[0], choice[1])
        fd.write(str(game.gameboard.score) + "\n")
    fd.close()
Ejemplo n.º 16
0
    return best_next_node


def get_best_child(node):
    best_quality_value = 0
    best_child = None
    for child in node.child:
        if child.quality_value > best_quality_value:
            best_quality_value = child.quality_value
            best_child = child
    return best_child


if __name__ == "__main__":
    gameplay = Game()

    num_available_choices = len(gameplay.gameboard.get_available_choices())
    init_state = State(gameplay.gameboard.board, 0, [], num_available_choices)
    root_node = Node(state=init_state)
    current_node = root_node

    gameplay.gameboard.print_board()

    for i in range(20):
        if i % MAX_ROUND_NUMBER == 0:
            num_available_choices = len(
                gameplay.gameboard.get_available_choices())
            init_state = State(gameplay.gameboard.board, 0, [],
                               num_available_choices)
            root_node = Node(state=init_state)
Ejemplo n.º 17
0
def init_game():
    game = Game(show=False)
    return game
Ejemplo n.º 18
0
        best_next_node = self.best_child(node, False)
        #print("quality value:", best_next_node.quality_value)
        return best_next_node

    def get_next_node(self):
        self.current_node = self.monte_carlo_tree_search(self.current_node)
        return self.current_node


if __name__ == "__main__":
    total_score = 0
    total_game = 1000
    score_list = []
    for game in range(total_game):
        gameplay = Game(show=False)
        mcts = MCTS(gameplay.gameboard)

        for i in range(15):
            current_node = mcts.get_next_node()
            choice = current_node.state.get_choice()
            #print("You have choosen : " + str(choice[0]) + " " + str(choice[1]))
            gameplay.input_pos(choice[0], choice[1])

        score_list.append(gameplay.gameboard.score)
        total_score += gameplay.gameboard.score
    ave_score = total_score / total_game
    print(ave_score)
    print(ave_score / 15)

    with open("./plot/mcts_alone.txt", "w") as fd: