Esempio n. 1
0
    def action(self, board):
        env = GameEnv().update(board)
        key = self.counter_key(env)

        #No_White = True
        #White = ['W', 'X', 'Y']
        #for i in range(3):
        #    for j in range(3):
        #        if env.board[i][j] in White:
        #            No_White = False
        #if No_White: action = 999
        #else:
        for tl in range(self.play_config.thinking_loop):
            self.search_moves(board)

            policy = self.calc_policy(board)
            action = int(np.random.choice(range(self.labels_n), p=policy))

            action_by_value = int(
                np.argmax(self.var_q[key] + (self.var_n[key] > 0) * 100))
            if action == action_by_value or env.turn < self.play_config.change_tau_turn:
                break

        # this is for play_gui, not necessary when training.
        self.thinking_history[env.observation] = HistoryItem(
            action, policy, list(self.var_q[key]), list(self.var_n[key]))

        self.moves.append([env.observation, list(policy)])
        return action
Esempio n. 2
0
    def play_game(self, best_model, ng_model):
        env = GameEnv().reset()

        best_player = GamePlayer(self.config,
                                 best_model,
                                 play_config=self.config.eval.play_config)
        ng_player = GamePlayer(self.config,
                               ng_model,
                               play_config=self.config.eval.play_config)
        best_is_white = random() < 0.5
        if not best_is_white:
            black, white = best_player, ng_player
        else:
            black, white = ng_player, best_player

        env.reset()
        while not env.done:
            if env.player_turn() == Player.black:
                action = black.action(env.board)
            else:
                action = white.action(env.board)
            env.step(action)

        ng_win = None
        if env.winner == Winner.white:
            if best_is_white:
                ng_win = 0
            else:
                ng_win = 1
        elif env.winner == Winner.black:
            if best_is_white:
                ng_win = 1
            else:
                ng_win = 0
        return ng_win, best_is_white
Esempio n. 3
0
def start(config: Config):
    PlayWithHumanConfig().update_play_config(config.play)
    game_model = PlayWithHuman(config)

    while True:
        env = GameEnv().reset()
        human_is_black = random() < 0.5
        game_model.start_game(human_is_black)

        while not env.done:
            if env.player_turn() == Player.black:
                if not human_is_black:
                    action = game_model.move_by_ai(env)
                    print("IA moves to: " + str(action))
                else:
                    action = game_model.move_by_human(env)
                    print("You move to: " + str(action))
            else:
                if human_is_black:
                    action = game_model.move_by_ai(env)
                    print("IA moves to: " + str(action))
                else:
                    action = game_model.move_by_human(env)
                    print("You move to: " + str(action))
            env.step(action)
            env.render()

        print("\nEnd of the game.")
        print("Game result:")
        if env.winner == Winner.white:
            print("X wins")
        elif env.winner == Winner.black:
            print("O wins")
        else:
            print("Game was a draw")
Esempio n. 4
0
 async def start_search_my_move(self, board):
     self.running_simulation_num += 1
     with await self.sem:  # reduce parallel search number
         env = GameEnv().update(board)
         #print(env.board)
         #input()
         leaf_v = await self.search_my_move(env, is_root_node=True)
         self.running_simulation_num -= 1
         return leaf_v
Esempio n. 5
0
 def calc_policy(self, board):
     #calc π(a|s0)
     pc = self.play_config
     env = GameEnv().update(board)
     key = self.counter_key(env)
     if env.turn < pc.change_tau_turn:
         #print(key)
         #print('self.var_n[key]',self.var_n[key])
         #print('np.sum(self.var_n[key]',np.sum(self.var_n[key]))
         #input()
         return self.var_n[key] / np.sum(self.var_n[key])  # tau = 1
     else:
         action = np.argmax(self.var_n[key])  # tau = 0
         ret = np.zeros(self.labels_n)
         ret[action] = 1
         return ret
Esempio n. 6
0
    def __init__(self, config: Config):
        self.config = config
        self.model = None  # type: GameModel
        self.loaded_filenames = set()
        self.loaded_data = {}
        self.dataset = None
        self.optimizer = None
        self.dbx = None
        self.version = 0  # Change to dynamic lookup from Drop Box Files
        self.env = GameEnv()
        self.raw_timestamp = None
        self.best_is_white = True
        self.play_files_per_generation = 15  # each file includes 25 games so each generation adds 375 games
        self.max_play_files = 300

        # at final there are alawys 7500 games to look at from previous 20 generations
        self.min_play_files_to_learn = 0
        self.play_files_on_dropbox = 0
Esempio n. 7
0
    def convert_to_training_data(data):
        state_list = []
        policy_list = []
        z_list = []
        for state, policy, z in data:
            board = list(state)
            board = np.reshape(board, (3, 3))
            env = GameEnv().update(board)

            black_ary, white_ary = env.black_and_white_plane()
            state = [
                black_ary, white_ary
            ] if env.player_turn() == Player.black else [white_ary, black_ary]

            state_list.append(state)
            policy_list.append(policy)
            z_list.append(z)

        return np.array(state_list), np.array(policy_list), np.array(z_list)
Esempio n. 8
0
    def play_game(self, best_model, ng_model):
        env = GameEnv().reset()

        if (self.raw_timestamp != self.dbx.files_get_metadata(
                '/model/model_best_weight.h5').client_modified):
            print('A newer model version is available - giving up this match')
            ng_win = 0
            self.best_is_white = True
            return ng_win, self.best_is_white

        best_player = GamePlayer(self.config,
                                 best_model,
                                 play_config=self.config.eval.play_config)
        ng_player = GamePlayer(self.config,
                               ng_model,
                               play_config=self.config.eval.play_config)
        self.best_is_white = not self.best_is_white
        if not self.best_is_white:
            black, white = best_player, ng_player
        else:
            black, white = ng_player, best_player

        env.reset()
        while not env.done:
            if env.player_turn() == Player.black:
                action = black.action(env.board)
            else:
                action = white.action(env.board)
            env.step(action)

        ng_win = None
        if env.winner == Winner.white:
            if self.best_is_white:
                ng_win = 0
            else:
                ng_win = 1
        elif env.winner == Winner.black:
            if self.best_is_white:
                ng_win = 1
            else:
                ng_win = 0
        return ng_win, self.best_is_white
Esempio n. 9
0
def start(config: Config):
    tf_util.set_session_config(per_process_gpu_memory_fraction=0.2)
    return SelfPlayWorker(config, env=GameEnv()).start()