Ejemplos de Move en Python, ejemplos de src.play.model.Move.Move en Python

Ejemplo n.º 1

0

Mostrar archivo

def main():
    model = keras.models.load_model('src/learn/RL_Atari/test_model_1.h5')
    game = Game()
    col_coord, row_coord = 1, 6
    game = init_game(game, col_coord, row_coord)
    print('new game')
    print(game)
    k = 0
    #print(model.predict(board2input(game,'b'),batch_size=1))
    #time.sleep(40)
    while k < 4:
        qval = model.predict(board2input(game, 'b'), batch_size=1)
        #print(qval)
        #time.sleep(100)
        temp_qval = copy.copy(qval)
        move = np.argmax(qval)
        #print(move)
        move = Move.from_flat_idx(move)
        location = move.to_matrix_location()
        while game.board[location] != EMPTY:
            temp_qval[0][np.argmax(
                temp_qval
            )] = -100  # arbit low value. To get to second max value.
            move = np.argmax(temp_qval)
            move = Move.from_flat_idx(move)
            location = move.to_matrix_location()
        game.play(move, 'b')
        print(game)
        k = k + 1

Ejemplo n.º 2

0

Mostrar archivo

Archivo: LibertyNNBot.py Proyecto: benjaminaaron/GO_DILab

    def genmove(self, color, game) -> Move:
        # We're still interested in the playable locations
        playable_locations = game.get_playable_locations(color)

        # Format the board and make predictions
        inp = self.board_to_input(color, game.board)
        pred_moves = self.model.predict(inp)
        pred_moves = pred_moves.reshape(9, 9)

        # print(pred_moves)
        # print(playable_locations)
        dummy_value = -10
        potential_moves = np.array([[dummy_value] * 9] * 9, dtype=float)
        for move in playable_locations:
            # print(move)
            if move.is_pass:
                continue
            loc = move.to_matrix_location()
            potential_moves[loc[0]][loc[1]] = pred_moves[loc[0]][loc[1]]

        potential_moves = self.softmax(potential_moves)

        row, col = np.unravel_index(potential_moves.argmax(),
                                    potential_moves.shape)

        move = Move(col=col, row=row)
        # if game.board[col,row] != 0:
        #     move = Move(is_pass = True)
        #     return move

        if potential_moves[move.to_matrix_location()] == dummy_value:
            move = Move(is_pass=True)

        return move

Ejemplo n.º 3

0

Mostrar archivo

    def _genmove(self, color, game, flat_board):
        flat_board = flat_board.reshape(1, len(flat_board))

        inp = self.board_to_input(flat_board)
        current_pred = self.model.predict(inp)

        my_index = 0 if color == 'b' else 1
        my_pred = current_pred[0, my_index]
        my_value = BLACK if color == 'b' else WHITE

        # We're still interested in the playable locations
        playable_locations = game.get_playable_locations(color)
        results = np.zeros(game.board.shape)
        for move in playable_locations:
            if move.is_pass:
                continue

            test_board = copy.deepcopy(game.board)
            test_board.place_stone_and_capture_if_applicable_default_values(
                move.to_matrix_location(), my_value)
            inp = self.board_to_input(test_board.flatten())
            pred_result = self.model.predict(inp)
            # pred_result = self.softmax(pred_result)

            results[move.to_matrix_location()] = pred_result[0, my_index]

        results -= my_pred

        row, col = np.unravel_index(results.argmax(), results.shape)

        move = Move(col=col, row=row)
        if (results[move.to_matrix_location()] <= 0):
            move = Move(is_pass=True)

        return move

Ejemplo n.º 4

0

Mostrar archivo

    def genmove(self, color, game) -> Move:
        my_index = 0 if color == 'b' else 1

        # We're still interested in the playable locations
        playable_locations = game.get_playable_locations(color)

        inp = self.board_to_input(color, game.board)
        current_pred = self.model.predict(inp)
        # print('Current outcome prediction:', current_pred)
        # assert (self.softmax(current_pred) == current_pred).all()
        current_pred = self.softmax(current_pred)
        my_pred = current_pred[0, my_index]

        my_value = BLACK if color == 'b' else WHITE

        results = np.zeros(game.board.shape)

        for move in playable_locations:
            if move.is_pass:
                continue

            test_board = copy.deepcopy(game.board)
            test_board[move.to_matrix_location()] = my_value
            inp = self.board_to_input(color, test_board)
            pred_result = self.model.predict(inp)
            pred_result = self.softmax(pred_result)

            results[move.to_matrix_location()] = pred_result[0, my_index]

        # print(results>0)
        # print(my_pred)
        results -= my_pred
        # print(results>0)
        """ `results` now contains our prediction of our win probabilities
        for each move, adjusted by our current win probability. We can now
        easily check if a move is worth playing by checking the
        sign; If it is negative, our probability to win gets worse. In general
        the higher the number in `results` the better the move."""

        row, col = np.unravel_index(results.argmax(), results.shape)

        move = Move(col=col, row=row)
        if (results[move.to_matrix_location()] <= 0):
            move = Move(is_pass=True)

        # print('Returned move:', move.to_gtp(9))

        return move

Ejemplo n.º 5

0

Mostrar archivo

    def genmove(self, color, game) -> Move:
        board = np.array(game.board)
        my_value = WHITE if color == 'w' else BLACK
        # enemy_value = BLACK if my_value == WHITE else WHITE
        inp = self.generate_input(board, my_value)
        if self.verbose:
            print(inp)
        policy = self.model(inp)
        policy = policy.data.numpy().flatten()

        playable_locations = game.get_playable_locations(color)

        # Default: passing
        policy_move = Move(is_pass=True)
        policy_move_prob = policy[81]

        for move in playable_locations:
            if self.verbose:
                print(move)
            if move.is_pass:
                continue

            if policy[move.to_flat_idx()] > policy_move_prob:
                policy_move = move
                policy_move_prob = policy[move.to_flat_idx()]

        return policy_move

Ejemplo n.º 6

0

Mostrar archivo

    def _genmove(self, color, game, flat_board):
        flat_board = flat_board.reshape(1, len(flat_board))

        X = self.board_to_input(flat_board)
        predict = self.model.predict(X)[0]

        # Set invalid moves to 0
        for move in game.get_invalid_locations(color):
            flat_idx = move.to_flat_idx()
            predict[flat_idx] = 0

        max_idx = np.argmax(predict)
        if max_idx == 81 or predict[max_idx] == 0:
            return Move(is_pass=True)
        else:
            return Move.from_flat_idx(max_idx)

Ejemplo n.º 7

0

Mostrar archivo

    def open(self):
        pygame.init()
        self.running = True
        self.screen = pygame.display.set_mode(window_size)
        pygame.display.set_caption('Go')
        self.buttons.append(
            Button(210, 530, 80, 40, 'Pass', self.screen, self.send_pass_move))
        self.labels.append(
            Label(100, 30, 300, 40, self.get_turn_label_text, self.screen))
        self.render()

        while self.running:
            event = pygame.event.poll()
            if event.type == pygame.MOUSEBUTTONUP:
                x, y = event.pos
                col = int(round(
                    (x - board_top_left_coord[0]) / self.cell_size))
                row = int(round(
                    (y - board_top_left_coord[1]) / self.cell_size))
                if 0 <= col < self.game.size and 0 <= row < self.game.size:
                    self.controller.receive_move_from_gui(Move(col, row))
                for btn in self.buttons:
                    btn.check_mouse_released()
            if event.type == pygame.QUIT:
                self.running = False
            for btn in self.buttons:
                btn.is_mouse_over_btn()
            self.render()

        pygame.quit()
        sys.exit(0)

Ejemplo n.º 8

0

Mostrar archivo

Archivo: PolicyBot.py Proyecto: benjaminaaron/GO_DILab

    def _genmove(self, color, game, flat_board):
        """Generate a move - PolicyBot logic

        The logic of this bot is basically:
        1. Directly generate a move
        2. Take the valid move with the highest score
        """
        color = WHITE if color == 'w' else BLACK
        flat_board = flat_board.reshape(1, len(flat_board))

        # 1. Generate move probabilities
        inp = self.generate_nn_input(flat_board, color)
        prediction = self.model.predict(inp)[0]

        # 2. Look at each valid move and take the best one
        # Yes, this is looped, bad perf, but it is intuitively understandable
        # and it leaves little room for errors!
        playable_locations = game.get_playable_locations(color)
        best_move = Move(is_pass=True)
        best_move_prob = prediction[81]
        for move in playable_locations:
            if move.is_pass:
                continue

            if prediction[move.to_flat_idx()] > best_move_prob:
                best_move = move
                best_move_prob = prediction[move.to_flat_idx()]

        return best_move

Ejemplo n.º 9

0

Mostrar archivo

def replay_game(sgf_line, func):
    """Simply recreate a game from a sgf file

    More of a proof-of-concept or example than really a necessary function.
    We will use some modified version of this to create the training data.
    """
    collection = sgf.parse(sgf_line)

    # This all only works if the SGF contains only one game
    game_tree = collection.children[0]
    game_properties = game_tree.nodes[0].properties
    # game_id = game_properties['GN'][0]

    if not (game_properties['RE'][0].startswith('B')
            or game_properties['RE'][0].startswith('W')):
        return None
    black_win = True if game_properties['RE'][0].startswith('B') else False

    game = Game(game_properties)
    # board = Board([[0]*9]*9)
    out = []
    for n in game_tree.nodes[1:]:
        player_color = list(n.properties.keys())[0]
        move = Move.from_sgf(str(n.properties[player_color][0]))
        # board[move.to_matrix_location()] = 1 if player_color=='b' else -1
        # neighbors = board.get_all_neigh
        game.play(move, player_color.lower(), checking=False)
        out.append(func(game, player_color.lower(), black_win))
    out = np.stack(out)
    # print(out.shape)
    return out

Ejemplo n.º 10

0

Mostrar archivo

Archivo: NNBot.py Proyecto: benjaminaaron/GO_DILab

 def genmove(self, color, game) -> Move:
     nn_input_board = self.flatten_matrix(game.board)
     predict = self.model.predict(np.array([nn_input_board]))
     max_idx = np.argmax(predict)
     if max_idx is 0:
         return Move(is_pass=True)
     else:
         board = predict[0][1:]  # strip away the pass-slot at pos zero
         # set all invalid locations to 0 to avoid them being chosen
         # is that cheating the NN or cool?
         for move in game.get_invalid_locations(color):
             flat_idx = move.to_flat_idx(game.size)
             board[flat_idx] = 0
         max_idx = np.argmax(board)
         row = int(math.floor(max_idx / game.size))
         col = int(max_idx % game.size)
         return Move(col=col, row=row)

Ejemplo n.º 11

0

Mostrar archivo

 def genmove(self, color, game) -> Move:
     input_board = self.flatten_matrix(game.board, color)
     pred = self.model.predict(np.array([input_board]).reshape(1, -1))
     max_idx = np.argmax(pred)
     if max_idx is 81:
         return Move(is_pass=True)
     else:
         board = pred[0][0:81]
         # set all invalid locations to -1 to avoid them being chosen
         # if all moves are invalid, play pass
         for move in game.get_invalid_locations(color):
             flat_idx = move.to_flat_idx(game.size)
             board[flat_idx] = -1
         max_idx = np.argmax(board)
         row = int(math.floor(max_idx / game.size))
         col = int(max_idx % game.size)
         return Move(col=col, row=row)

Ejemplo n.º 12

0

Mostrar archivo

Archivo: train_atari.py Proyecto: benjaminaaron/GO_DILab

def check_dead_group(game, col_coord, row_coord):
    b = game.board
    total_neighbors = []
    loc = Move(col=col_coord, row=row_coord).to_matrix_location()
    total_neighbors = b.get_adjacent_coords(loc)
    for n in total_neighbors:
        if b[n] == EMPTY:
            return False
    return True

Ejemplo n.º 13

0

Mostrar archivo

Archivo: NNBot_ben1.py Proyecto: benjaminaaron/GO_DILab

    def _genmove(self, color, game, flat_board):
        flat_board = flat_board.reshape(1, len(flat_board))
        predict = self.model.predict(flat_board)[0]
        max_idx = np.argmax(predict)
        if max_idx == 82:
            return Move(is_pass=True)
        else:
            board = predict[:-1]  # strip away the pass-slot at pos 82
            # set all invalid locations to 0 to avoid them being chosen
            for move in game.get_invalid_locations(color):
                flat_idx = move.to_flat_idx(game.size)
                board[flat_idx] = 0
            max_idx = np.argmax(board)

            # If this move is invalid pass!
            if board[max_idx] == 0:
                return Move(is_pass=True)

            return Move.from_flat_idx(max_idx)

Ejemplo n.º 14

0

Mostrar archivo

Archivo: bot.py Proyecto: benjaminaaron/GO_DILab

    def genmove(self, color, game) -> Move:
        board = np.array(game.board)
        my_value = WHITE if color == 'w' else BLACK
        # enemy_value = BLACK if my_value == WHITE else WHITE
        inp = self.generate_input(board, my_value)
        if self.verbose:
            print(inp)
        policy, value = self.model(inp)
        policy = policy.data.numpy().flatten()
        value = value.data.numpy().flatten()

        playable_locations = game.get_playable_locations(color)

        # Default: passing
        policy_move = value_move = Move(is_pass=True)
        policy_move_prob = policy[81]
        value_move_prob = value

        for move in playable_locations:
            if self.verbose:
                print(move)
            if move.is_pass:
                continue

            if self.logic == 'value':
                # Play move on a test board
                test_board = copy.deepcopy(game.board)
                test_board.place_stone_and_capture_if_applicable_default_values(
                    move.to_matrix_location(), my_value)

                # Evaluate state - attention: Enemy's turn!
                # inp = self.generate_input(np.array(test_board), enemy_value)
                # _, enemy_win_prob = self.model(inp)
                # enemy_win_prob = enemy_win_prob.data.numpy().flatten()
                # my_new_value = -enemy_win_prob

                # Disregard that right now and just get my own win prob
                inp = self.generate_input(np.array(test_board), my_value)
                _, new_value = self.model(inp)
                new_value = new_value.data.numpy().flatten()
                if new_value > value_move_prob:
                    value_move = move
                    value_move_prob = new_value

            if self.logic == 'policy':
                if policy[move.to_flat_idx()] > policy_move_prob:
                    policy_move = move
                    policy_move_prob = policy[move.to_flat_idx()]

        if self.logic == 'policy':
            out_move = policy_move
        if self.logic == 'value':
            out_move = value_move

        return out_move

Ejemplo n.º 15

0

Mostrar archivo

Archivo: MovePredictionBot.py Proyecto: benjaminaaron/GO_DILab

    def _genmove(self, color, game, flat_board):
        flat_board = flat_board.reshape(1, len(flat_board))
        input_board = flat_board.tolist()
        input_board = [
            self.replace_entry(entry) for row in input_board for entry in row
        ]
        if color == BLACK:
            input_board.append(1)
        else:
            input_board.append(-1)
        pred = self.model.predict(np.array([input_board]).reshape(1, -1))[0]

        for move in game.get_invalid_locations(color):
            flat_idx = move.to_flat_idx(game.size)
            pred[flat_idx] = -1
        max_idx = np.argmax(pred)
        if max_idx == 81:
            return Move(is_pass=True)
        else:
            if pred[max_idx] == -1:
                return Move(is_pass=True)
            return Move.from_flat_idx(max_idx)

Ejemplo n.º 16

0

Mostrar archivo

 def genmove(color, game) -> Move:
     move = None
     while move is None:
         try:
             print('\nsubmit your move:')
             move_str = input()
             move = Move().from_gtp(move_str, game.size)
             game.play(move, color, testing=True)
         except InvalidMove_Error as e:
             move = None
             print('\ninvalid move, choose another location or "pass":'******'\nbad input, retry or "pass":')
     return move

Ejemplo n.º 17

0

Mostrar archivo

Archivo: ValueBot.py Proyecto: benjaminaaron/GO_DILab

    def _genmove(self, color, game, flat_board):
        """Generate a move - ValueBot logic

        The logic of this bot is basically:
        1. Evaluate current probability of winning
        2. Evaluate the probabilities of winning for each move
        3. Make the best move if there is a valid move that raises the probs
        """
        color = WHITE if color == 'w' else BLACK
        flat_board = flat_board.reshape(1, len(flat_board))
        my_value = color

        # 1. Get current Win Probability
        inp = self.generate_nn_input(flat_board, color)
        current_prob = self.model.predict(inp)
        assert np.sum(current_prob) == 1, np.sum(current_prob)
        # print(current_prob)

        # 2. Evaluate all possible moves
        best_win_prob = current_prob[0, 0]
        best_move = Move(is_pass=True)

        playable_locations = game.get_playable_locations(color)
        for move in playable_locations:
            if move.is_pass:
                continue

            # Play the move and evaluate the resulting board
            test_board = copy.deepcopy(game.board)
            test_board.place_stone_and_capture_if_applicable_default_values(
                move.to_matrix_location(), my_value)
            inp = self.generate_nn_input(test_board.flatten(), color)
            pred_result = self.model.predict(inp)[0, 0]

            if pred_result > best_win_prob:
                best_move = move
                best_win_prob = pred_result

        return best_move

Ejemplo n.º 18

0

Mostrar archivo

Archivo: GTPcontroller.py Proyecto: benjaminaaron/GO_DILab

    def run(self):
        self.game.start()
        while self.game.is_running:
            print('\nnext turn\n')
            response = self.wait_for_response(
                self.current_player, 'genmove ' + self.current_player.color)
            if response.startswith('?'):
                self.log_and_print(
                    'player ' + self.current_player.name +
                    ' responded with an error, aborting the game: ' + '"' +
                    response[2:] + '"')
                break
            move = response[2:]  # strip away the "= "
            self.send_to_player(
                self.other_player,
                'play ' + self.current_player.color + ' ' + move)

            self.game.play(Move().from_gtp(move, self.game.size),
                           self.current_player.color)
            print('\n' + self.game.__str__())

            time.sleep(self.end_of_turn_sleep_time)

            # swap players for next turn
            if self.current_player == self.player1:
                self.current_player = self.player2
                self.other_player = self.player1
            else:
                self.current_player = self.player1
                self.other_player = self.player2

        self.broadcast('quit')
        print('\n' + self.game.__str__())
        # if self.view is not None:
        #     self.view.game_ended()
        # else:
        print('Final result:', self.game.evaluate_points())
        sys.exit(0)

Ejemplo n.º 19

0

Mostrar archivo

    def genmove(self, color, game) -> Move:
        # We're still interested in the playable locations
        playable_locations = game.get_playable_locations(color)

        # Format the board and make predictions
        inp = self.board_to_input(color, game.board)
        bot_logger.debug('Input shape:', inp.shape)
        bot_logger.debug('Input:', inp)
        pred_moves = self.model.predict(inp)
        # pred_moves = self.model.predict(np.zeros((1, 162)))
        bot_logger.debug('This worked')
        bot_logger.debug('Predicted moves:', pred_moves)

        pred_moves = pred_moves.reshape(9, 9)
        # print(pred_moves)
        # print(playable_locations)
        dummy_value = -10
        potential_moves = np.array([[dummy_value] * 9] * 9, dtype=float)
        for move in playable_locations:
            # print(move)
            if move.is_pass:
                continue
            loc = move.to_matrix_location()
            potential_moves[loc[0]][loc[1]] = pred_moves[loc[0]][loc[1]]

        # print([i for row in potential_moves for i in row])

        potential_moves = self.softmax(potential_moves)

        row, col = np.unravel_index(potential_moves.argmax(),
                                    potential_moves.shape)

        move = Move(col=col, row=row)
        if (potential_moves[move.to_matrix_location()] == dummy_value
                or potential_moves[move.to_matrix_location()] <
            (1 / 81 + 0.0001)):
            move = Move(is_pass=True)

        return move

Ejemplo n.º 20

0

Mostrar archivo

 def send_pass_move(self):
     self.controller.receive_move_from_gui(Move(is_pass=True))

Ejemplo n.º 21

0

Mostrar archivo

Archivo: train_atari.py Proyecto: benjaminaaron/GO_DILab

def main():
    model = Sequential()
    model.add(
        Dense(units=200,
              kernel_initializer='uniform',
              activation='relu',
              input_shape=(243, )))
    model.add(Dense(units=400, kernel_initializer='uniform',
                    activation='relu'))
    model.add(Dense(units=200, kernel_initializer='uniform',
                    activation='relu'))
    model.add(
        Dense(units=81, kernel_initializer='uniform', activation='linear'))

    rms = RMSprop()
    model.compile(loss='mse', optimizer=rms)

    col_coord, row_coord = 1, 6  #random.randint(0, 8), random.randint(0, 8)
    epochs = 10
    gamma = 0.9
    epsilon = 1
    for i in range(epochs):

        game = Game()
        game = init_game(game, col_coord, row_coord)
        status = 1
        # game in progress
        while (status == 1):
            qval = model.predict(board2input(game, 'b'), batch_size=1)
            if (random.random() < epsilon):
                valid_moves = game.get_playable_locations('b')
                move = random.choice(valid_moves)
                while move.is_pass == True:
                    move = random.choice(valid_moves)
                new_game = copy.deepcopy(game)
                new_game.play(move, 'b')
                move = move.to_flat_idx()
            else:
                temp_qval = copy.copy(qval)
                move = (np.argmax(temp_qval))
                move = Move.from_flat_idx(move)
                new_game = copy.deepcopy(game)
                location = move.to_matrix_location()
                while new_game.board[location] != EMPTY:
                    temp_qval[0][np.argmax(
                        temp_qval
                    )] = -100  # arbit low value. To get to second max value.
                    move = np.argmax(temp_qval)
                    move = Move.from_flat_idx(move)
                    location = move.to_matrix_location()
                new_game.play(move, 'b')
                move = move.to_flat_idx()

            if check_dead_group(new_game, col_coord, row_coord) == True:
                reward = 10
                status = 0
            else:
                reward = -1

            # get maxQ from new state
            newQ = model.predict(board2input(game, 'b'), batch_size=1)
            maxQ = newQ[0][move]
            # update, reward : update = reward if reward = 100, else = reward + gamma*maxQ
            if reward == -1:  # non-terminal state
                update = (reward + (gamma * maxQ))
            else:  # terminal state
                update = reward
            # set y = qval, and y[action] = update => assigning reward value for action.
            y = np.zeros((1, 81))
            y[:] = qval[:]
            y[0][move] = update
            # fit the model according to present shape and y
            model.fit(board2input(game, 'b'),
                      y,
                      batch_size=1,
                      nb_epoch=1,
                      verbose=0)
            game = copy.copy(new_game)
        print('game ' + str(i) + ' ends here')
        if epsilon > 0.1:
            epsilon -= (1 / epochs)
            #print ('epsilon : ' + str(epsilon))

    model.save('test_model_1.h5')

Ejemplo n.º 22

0

Mostrar archivo

Archivo: train_atari.py Proyecto: benjaminaaron/GO_DILab

def init_game(game, col_coord, row_coord):
    move = Move(col=col_coord, row=row_coord)
    game.play(move, 'w')
    return game

Ejemplo n.º 23

0

Mostrar archivo

Archivo: train_harder_atari.py Proyecto: benjaminaaron/GO_DILab

def main():
    model = Sequential()
    model.add(
        Dense(units=200,
              kernel_initializer='uniform',
              activation='relu',
              input_shape=(243, )))
    model.add(Dense(units=400, kernel_initializer='uniform',
                    activation='relu'))
    model.add(Dense(units=200, kernel_initializer='uniform',
                    activation='relu'))
    model.add(
        Dense(units=81, kernel_initializer='uniform', activation='linear'))

    rms = RMSprop()
    model.compile(loss='mse', optimizer=rms)

    epochs = 50000
    gamma = 0.975
    epsilon = 1
    batchSize = 50
    buffer = 100
    replay = []
    h = 0
    for i in range(epochs):
        col_coord, row_coord = random.randint(0, 8), random.randint(0, 8)
        #print(col_coord,row_coord)
        game = Game()
        game = init_game(game, col_coord, row_coord)
        status = 1
        reward = -1  # by default at game start
        # game in progress
        while (status == 1):
            qval = model.predict(board2input(game, 'b'), batch_size=1)
            if reward == -1:
                if (random.random() < epsilon):
                    valid_moves = game.get_playable_locations(BLACK)
                    move = random.choice(valid_moves)
                    while move.is_pass == True:
                        move = random.choice(valid_moves)
                        if len(valid_moves) == 0:
                            print('end it')
                    new_game = copy.deepcopy(game)
                    new_game.play(move, 'b')
                    move = move.to_flat_idx()
                else:
                    temp_qval = copy.copy(qval)
                    move = (np.argmax(temp_qval))
                    move = Move.from_flat_idx(move)
                    new_game = copy.deepcopy(game)
                    location = move.to_matrix_location()
                    while new_game.board[location] != EMPTY:
                        temp_qval[0][np.argmax(
                            temp_qval
                        )] = -100  # arbit low value. To get to second max value.
                        move = np.argmax(temp_qval)
                        move = Move.from_flat_idx(move)
                        location = move.to_matrix_location()
                    new_game.play(move, 'b')
                    move = move.to_flat_idx()

            if check_dead_group(new_game, col_coord, row_coord) == True:
                reward = 50
            else:
                reward = -1

            # experience replay storage
            if len(replay) < buffer:
                replay.append((board2input(game, 'b'), move, reward,
                               board2input(new_game, 'b')))
            else:
                if (h < (buffer - 1)):
                    h += 1
                else:
                    h = 0
                replay[h] = (board2input(game, 'b'), move, reward,
                             board2input(new_game, 'b'))
                minibatch = random.sample(replay, batchSize)
                X_train = []
                y_train = []
                for memory in minibatch:
                    (m_game, m_move, m_reward, m_new_game) = memory
                    oldqval = model.predict(m_game, batch_size=1)
                    maxq = oldqval[0][m_move]
                    y = np.zeros(81)
                    y[:] = oldqval
                    if m_reward == 50:
                        update = m_reward
                    else:
                        update = m_reward + gamma * maxq
                    y[m_move] = update
                    X_train.append(m_game)
                    y_train.append(y)
                X_train = np.stack(X_train)
                y_train = np.stack(y_train)
                #print('ytrain: ', y_train[0])
                model.fit(X_train,
                          y_train,
                          batch_size=batchSize,
                          epochs=1,
                          verbose=0)
            game = copy.copy(new_game)
            if reward == 50:
                status = 0
        print('game ' + str(i) + ' ends here')
        #print(game)
        #temp_move = Move.from_flat_idx(move)
        #print(temp_move)
        #print(model.predict(board2input(game,'b'),batch_size=1))
        #input()
        if epsilon > 0.1:
            epsilon -= (1 / epochs)
            #print ('epsilon : ' + str(epsilon))
        if i % 5000 == 0 and i > 0:
            name = 'src/learn/RL_Atari/hard_atari_' + str(i) + '.h5'
            model.save(name)

    model.save('src/learn/RL_Atari/test_model_final.h5')