예제 #1
0
def main():
    model = keras.models.load_model('src/learn/RL_Atari/test_model_1.h5')
    game = Game()
    col_coord, row_coord = 1, 6
    game = init_game(game, col_coord, row_coord)
    print('new game')
    print(game)
    k = 0
    #print(model.predict(board2input(game,'b'),batch_size=1))
    #time.sleep(40)
    while k < 4:
        qval = model.predict(board2input(game, 'b'), batch_size=1)
        #print(qval)
        #time.sleep(100)
        temp_qval = copy.copy(qval)
        move = np.argmax(qval)
        #print(move)
        move = Move.from_flat_idx(move)
        location = move.to_matrix_location()
        while game.board[location] != EMPTY:
            temp_qval[0][np.argmax(
                temp_qval
            )] = -100  # arbit low value. To get to second max value.
            move = np.argmax(temp_qval)
            move = Move.from_flat_idx(move)
            location = move.to_matrix_location()
        game.play(move, 'b')
        print(game)
        k = k + 1
예제 #2
0
def play(args):
    player1, player2, verbose = args
    current, other = player1, player2
    current_col, other_col = 'b', 'w'
    last_move = None
    game = Game({'SZ': 9})

    while True:
        if verbose:
            print(game)

        move = current.genmove(current_col, game)
        # print(move.to_gtp(9))
        result = game.play(move, player=current_col)
        if (last_move is not None and
                last_move.is_pass and
                move.is_pass):
            if verbose:
                print(current_col, current)
                print(other_col, other)
            return result

        current, other = other, current
        current_col, other_col = other_col, current_col
        last_move = move
예제 #3
0
def replay_game(sgf_line, func):
    """Simply recreate a game from a sgf file

    More of a proof-of-concept or example than really a necessary function.
    We will use some modified version of this to create the training data.
    """
    collection = sgf.parse(sgf_line)

    # This all only works if the SGF contains only one game
    game_tree = collection.children[0]
    game_properties = game_tree.nodes[0].properties
    # game_id = game_properties['GN'][0]

    if not (game_properties['RE'][0].startswith('B')
            or game_properties['RE'][0].startswith('W')):
        return None
    black_win = True if game_properties['RE'][0].startswith('B') else False

    game = Game(game_properties)
    # board = Board([[0]*9]*9)
    out = []
    for n in game_tree.nodes[1:]:
        player_color = list(n.properties.keys())[0]
        move = Move.from_sgf(str(n.properties[player_color][0]))
        # board[move.to_matrix_location()] = 1 if player_color=='b' else -1
        # neighbors = board.get_all_neigh
        game.play(move, player_color.lower(), checking=False)
        out.append(func(game, player_color.lower(), black_win))
    out = np.stack(out)
    # print(out.shape)
    return out
예제 #4
0
 def __init__(self, player1type, player2type, logging_level,
              end_of_turn_sleep_time):
     threading.Thread.__init__(self)
     self.logger = Utils.get_unique_file_logger(self, logging_level)
     self.end_of_turn_sleep_time = end_of_turn_sleep_time
     self.game = Game()
     self.view = None
     self.player1 = Player('b', logging_level)
     self.player1.engine.controller = self
     self.player2 = Player('w', logging_level)
     self.player2.engine.controller = self
     self.map = {
         self.player1.engine: self.player1,
         self.player2.engine: self.player2,
     }
     self.send_to_player(self.player1, 'set_player_type ' + player1type)
     self.send_to_player(self.player2, 'set_player_type ' + player2type)
     self.player1.name = self.wait_for_response(self.player1, 'name')[2:]
     self.player2.name = self.wait_for_response(self.player2, 'name')[2:]
     self.current_player = self.player1
     self.other_player = self.player2
예제 #5
0
            sgf_files.append(path)

for file in sgf_files:
    with open(file, 'r') as f:
        content = f.read()
        try:
            collection = sgf.parse(content)
        except Exception as e:
            print('Failed to parse ' + file + ' as sgf-collection')
            continue

    # Assume the sgf file contains one game
    game_tree = collection.children[0]
    n_0 = game_tree.nodes[0]
    # n_0.properties contains the initial game setup
    game_id = n_0.properties['GN'][0]
    out_file = os.path.join(root_dir, game_id + '.csv')
    if os.path.isfile(out_file):
        os.remove(out_file)

    # very similar to play_from_sgf.py, unify these parts TODO
    board_size = int(n_0.properties['SZ'][0])
    game = Game(n_0.properties, show_each_turn=True)
    for n in game_tree.nodes[1:]:
        player_color = list(n.properties.keys())[0]
        move_str = str(n.properties[player_color][0])
        move = str2move(move_str, board_size)
        game.play(move, player_color.lower())

    game.board2file(out_file, 'a')
예제 #6
0
def main():
    model = Sequential()
    model.add(
        Dense(units=200,
              kernel_initializer='uniform',
              activation='relu',
              input_shape=(243, )))
    model.add(Dense(units=400, kernel_initializer='uniform',
                    activation='relu'))
    model.add(Dense(units=200, kernel_initializer='uniform',
                    activation='relu'))
    model.add(
        Dense(units=81, kernel_initializer='uniform', activation='linear'))

    rms = RMSprop()
    model.compile(loss='mse', optimizer=rms)

    col_coord, row_coord = 1, 6  #random.randint(0, 8), random.randint(0, 8)
    epochs = 10
    gamma = 0.9
    epsilon = 1
    for i in range(epochs):

        game = Game()
        game = init_game(game, col_coord, row_coord)
        status = 1
        # game in progress
        while (status == 1):
            qval = model.predict(board2input(game, 'b'), batch_size=1)
            if (random.random() < epsilon):
                valid_moves = game.get_playable_locations('b')
                move = random.choice(valid_moves)
                while move.is_pass == True:
                    move = random.choice(valid_moves)
                new_game = copy.deepcopy(game)
                new_game.play(move, 'b')
                move = move.to_flat_idx()
            else:
                temp_qval = copy.copy(qval)
                move = (np.argmax(temp_qval))
                move = Move.from_flat_idx(move)
                new_game = copy.deepcopy(game)
                location = move.to_matrix_location()
                while new_game.board[location] != EMPTY:
                    temp_qval[0][np.argmax(
                        temp_qval
                    )] = -100  # arbit low value. To get to second max value.
                    move = np.argmax(temp_qval)
                    move = Move.from_flat_idx(move)
                    location = move.to_matrix_location()
                new_game.play(move, 'b')
                move = move.to_flat_idx()

            if check_dead_group(new_game, col_coord, row_coord) == True:
                reward = 10
                status = 0
            else:
                reward = -1

            # get maxQ from new state
            newQ = model.predict(board2input(game, 'b'), batch_size=1)
            maxQ = newQ[0][move]
            # update, reward : update = reward if reward = 100, else = reward + gamma*maxQ
            if reward == -1:  # non-terminal state
                update = (reward + (gamma * maxQ))
            else:  # terminal state
                update = reward
            # set y = qval, and y[action] = update => assigning reward value for action.
            y = np.zeros((1, 81))
            y[:] = qval[:]
            y[0][move] = update
            # fit the model according to present shape and y
            model.fit(board2input(game, 'b'),
                      y,
                      batch_size=1,
                      nb_epoch=1,
                      verbose=0)
            game = copy.copy(new_game)
        print('game ' + str(i) + ' ends here')
        if epsilon > 0.1:
            epsilon -= (1 / epochs)
            #print ('epsilon : ' + str(epsilon))

    model.save('test_model_1.h5')
예제 #7
0
def main():
    model = Sequential()
    model.add(
        Dense(units=200,
              kernel_initializer='uniform',
              activation='relu',
              input_shape=(243, )))
    model.add(Dense(units=400, kernel_initializer='uniform',
                    activation='relu'))
    model.add(Dense(units=200, kernel_initializer='uniform',
                    activation='relu'))
    model.add(
        Dense(units=81, kernel_initializer='uniform', activation='linear'))

    rms = RMSprop()
    model.compile(loss='mse', optimizer=rms)

    epochs = 50000
    gamma = 0.975
    epsilon = 1
    batchSize = 50
    buffer = 100
    replay = []
    h = 0
    for i in range(epochs):
        col_coord, row_coord = random.randint(0, 8), random.randint(0, 8)
        #print(col_coord,row_coord)
        game = Game()
        game = init_game(game, col_coord, row_coord)
        status = 1
        reward = -1  # by default at game start
        # game in progress
        while (status == 1):
            qval = model.predict(board2input(game, 'b'), batch_size=1)
            if reward == -1:
                if (random.random() < epsilon):
                    valid_moves = game.get_playable_locations(BLACK)
                    move = random.choice(valid_moves)
                    while move.is_pass == True:
                        move = random.choice(valid_moves)
                        if len(valid_moves) == 0:
                            print('end it')
                    new_game = copy.deepcopy(game)
                    new_game.play(move, 'b')
                    move = move.to_flat_idx()
                else:
                    temp_qval = copy.copy(qval)
                    move = (np.argmax(temp_qval))
                    move = Move.from_flat_idx(move)
                    new_game = copy.deepcopy(game)
                    location = move.to_matrix_location()
                    while new_game.board[location] != EMPTY:
                        temp_qval[0][np.argmax(
                            temp_qval
                        )] = -100  # arbit low value. To get to second max value.
                        move = np.argmax(temp_qval)
                        move = Move.from_flat_idx(move)
                        location = move.to_matrix_location()
                    new_game.play(move, 'b')
                    move = move.to_flat_idx()

            if check_dead_group(new_game, col_coord, row_coord) == True:
                reward = 50
            else:
                reward = -1

            # experience replay storage
            if len(replay) < buffer:
                replay.append((board2input(game, 'b'), move, reward,
                               board2input(new_game, 'b')))
            else:
                if (h < (buffer - 1)):
                    h += 1
                else:
                    h = 0
                replay[h] = (board2input(game, 'b'), move, reward,
                             board2input(new_game, 'b'))
                minibatch = random.sample(replay, batchSize)
                X_train = []
                y_train = []
                for memory in minibatch:
                    (m_game, m_move, m_reward, m_new_game) = memory
                    oldqval = model.predict(m_game, batch_size=1)
                    maxq = oldqval[0][m_move]
                    y = np.zeros(81)
                    y[:] = oldqval
                    if m_reward == 50:
                        update = m_reward
                    else:
                        update = m_reward + gamma * maxq
                    y[m_move] = update
                    X_train.append(m_game)
                    y_train.append(y)
                X_train = np.stack(X_train)
                y_train = np.stack(y_train)
                #print('ytrain: ', y_train[0])
                model.fit(X_train,
                          y_train,
                          batch_size=batchSize,
                          epochs=1,
                          verbose=0)
            game = copy.copy(new_game)
            if reward == 50:
                status = 0
        print('game ' + str(i) + ' ends here')
        #print(game)
        #temp_move = Move.from_flat_idx(move)
        #print(temp_move)
        #print(model.predict(board2input(game,'b'),batch_size=1))
        #input()
        if epsilon > 0.1:
            epsilon -= (1 / epochs)
            #print ('epsilon : ' + str(epsilon))
        if i % 5000 == 0 and i > 0:
            name = 'src/learn/RL_Atari/hard_atari_' + str(i) + '.h5'
            model.save(name)

    model.save('src/learn/RL_Atari/test_model_final.h5')
예제 #8
0
class GTPcontroller(threading.Thread):
    def __init__(self, player1type, player2type, logging_level,
                 end_of_turn_sleep_time):
        threading.Thread.__init__(self)
        self.logger = Utils.get_unique_file_logger(self, logging_level)
        self.end_of_turn_sleep_time = end_of_turn_sleep_time
        self.game = Game()
        self.view = None
        self.player1 = Player('b', logging_level)
        self.player1.engine.controller = self
        self.player2 = Player('w', logging_level)
        self.player2.engine.controller = self
        self.map = {
            self.player1.engine: self.player1,
            self.player2.engine: self.player2,
        }
        self.send_to_player(self.player1, 'set_player_type ' + player1type)
        self.send_to_player(self.player2, 'set_player_type ' + player2type)
        self.player1.name = self.wait_for_response(self.player1, 'name')[2:]
        self.player2.name = self.wait_for_response(self.player2, 'name')[2:]
        self.current_player = self.player1
        self.other_player = self.player2

    def log_and_print(self, message):
        self.logger.info(message)
        print(message)

    def send_to_player(self, player, command):
        self.log_and_print('      send to ' + player.name + ' (' +
                           player.color + '): ' + command)
        player.engine.handle_input_from_controller(command)

    def broadcast(self, command):
        self.send_to_player(self.player1, 'quit')
        self.send_to_player(self.player2, 'quit')

    def wait_for_response(self, player, message):
        self.send_to_player(player, message)
        while player.latest_response is None:
            pass
        return player.get_latest_response()

    def run(self):
        self.game.start()
        while self.game.is_running:
            print('\nnext turn\n')
            response = self.wait_for_response(
                self.current_player, 'genmove ' + self.current_player.color)
            if response.startswith('?'):
                self.log_and_print(
                    'player ' + self.current_player.name +
                    ' responded with an error, aborting the game: ' + '"' +
                    response[2:] + '"')
                break
            move = response[2:]  # strip away the "= "
            self.send_to_player(
                self.other_player,
                'play ' + self.current_player.color + ' ' + move)

            self.game.play(Move().from_gtp(move, self.game.size),
                           self.current_player.color)
            print('\n' + self.game.__str__())

            time.sleep(self.end_of_turn_sleep_time)

            # swap players for next turn
            if self.current_player == self.player1:
                self.current_player = self.player2
                self.other_player = self.player1
            else:
                self.current_player = self.player1
                self.other_player = self.player2

        self.broadcast('quit')
        print('\n' + self.game.__str__())
        # if self.view is not None:
        #     self.view.game_ended()
        # else:
        print('Final result:', self.game.evaluate_points())
        sys.exit(0)

    def handle_input_from_engine(self, engine, input):
        input = input.strip()
        player = self.map[engine]
        self.log_and_print('received from ' + player.name + ' (' +
                           player.color + '): ' + input)
        player.latest_response = input

    def receive_move_from_gui(self, move):
        human = self.current_player.engine.bot
        if type(human) is HumanGui:
            try:
                self.game.play(move, self.current_player.color, testing=True)
                human.move = move
            except InvalidMove_Error as e:
                print('\ninvalid move')