Esempio n. 1
0
def main():
    model = keras.models.load_model('src/learn/RL_Atari/test_model_1.h5')
    game = Game()
    col_coord, row_coord = 1, 6
    game = init_game(game, col_coord, row_coord)
    print('new game')
    print(game)
    k = 0
    #print(model.predict(board2input(game,'b'),batch_size=1))
    #time.sleep(40)
    while k < 4:
        qval = model.predict(board2input(game, 'b'), batch_size=1)
        #print(qval)
        #time.sleep(100)
        temp_qval = copy.copy(qval)
        move = np.argmax(qval)
        #print(move)
        move = Move.from_flat_idx(move)
        location = move.to_matrix_location()
        while game.board[location] != EMPTY:
            temp_qval[0][np.argmax(
                temp_qval
            )] = -100  # arbit low value. To get to second max value.
            move = np.argmax(temp_qval)
            move = Move.from_flat_idx(move)
            location = move.to_matrix_location()
        game.play(move, 'b')
        print(game)
        k = k + 1
Esempio n. 2
0
    def _genmove(self, color, game, flat_board):
        flat_board = flat_board.reshape(1, len(flat_board))

        X = self.board_to_input(flat_board)
        predict = self.model.predict(X)[0]

        # Set invalid moves to 0
        for move in game.get_invalid_locations(color):
            flat_idx = move.to_flat_idx()
            predict[flat_idx] = 0

        max_idx = np.argmax(predict)
        if max_idx == 81 or predict[max_idx] == 0:
            return Move(is_pass=True)
        else:
            return Move.from_flat_idx(max_idx)
Esempio n. 3
0
    def _genmove(self, color, game, flat_board):
        flat_board = flat_board.reshape(1, len(flat_board))
        predict = self.model.predict(flat_board)[0]
        max_idx = np.argmax(predict)
        if max_idx == 82:
            return Move(is_pass=True)
        else:
            board = predict[:-1]  # strip away the pass-slot at pos 82
            # set all invalid locations to 0 to avoid them being chosen
            for move in game.get_invalid_locations(color):
                flat_idx = move.to_flat_idx(game.size)
                board[flat_idx] = 0
            max_idx = np.argmax(board)

            # If this move is invalid pass!
            if board[max_idx] == 0:
                return Move(is_pass=True)

            return Move.from_flat_idx(max_idx)
    def _genmove(self, color, game, flat_board):
        flat_board = flat_board.reshape(1, len(flat_board))
        input_board = flat_board.tolist()
        input_board = [
            self.replace_entry(entry) for row in input_board for entry in row
        ]
        if color == BLACK:
            input_board.append(1)
        else:
            input_board.append(-1)
        pred = self.model.predict(np.array([input_board]).reshape(1, -1))[0]

        for move in game.get_invalid_locations(color):
            flat_idx = move.to_flat_idx(game.size)
            pred[flat_idx] = -1
        max_idx = np.argmax(pred)
        if max_idx == 81:
            return Move(is_pass=True)
        else:
            if pred[max_idx] == -1:
                return Move(is_pass=True)
            return Move.from_flat_idx(max_idx)
Esempio n. 5
0
def main():
    model = Sequential()
    model.add(
        Dense(units=200,
              kernel_initializer='uniform',
              activation='relu',
              input_shape=(243, )))
    model.add(Dense(units=400, kernel_initializer='uniform',
                    activation='relu'))
    model.add(Dense(units=200, kernel_initializer='uniform',
                    activation='relu'))
    model.add(
        Dense(units=81, kernel_initializer='uniform', activation='linear'))

    rms = RMSprop()
    model.compile(loss='mse', optimizer=rms)

    col_coord, row_coord = 1, 6  #random.randint(0, 8), random.randint(0, 8)
    epochs = 10
    gamma = 0.9
    epsilon = 1
    for i in range(epochs):

        game = Game()
        game = init_game(game, col_coord, row_coord)
        status = 1
        # game in progress
        while (status == 1):
            qval = model.predict(board2input(game, 'b'), batch_size=1)
            if (random.random() < epsilon):
                valid_moves = game.get_playable_locations('b')
                move = random.choice(valid_moves)
                while move.is_pass == True:
                    move = random.choice(valid_moves)
                new_game = copy.deepcopy(game)
                new_game.play(move, 'b')
                move = move.to_flat_idx()
            else:
                temp_qval = copy.copy(qval)
                move = (np.argmax(temp_qval))
                move = Move.from_flat_idx(move)
                new_game = copy.deepcopy(game)
                location = move.to_matrix_location()
                while new_game.board[location] != EMPTY:
                    temp_qval[0][np.argmax(
                        temp_qval
                    )] = -100  # arbit low value. To get to second max value.
                    move = np.argmax(temp_qval)
                    move = Move.from_flat_idx(move)
                    location = move.to_matrix_location()
                new_game.play(move, 'b')
                move = move.to_flat_idx()

            if check_dead_group(new_game, col_coord, row_coord) == True:
                reward = 10
                status = 0
            else:
                reward = -1

            # get maxQ from new state
            newQ = model.predict(board2input(game, 'b'), batch_size=1)
            maxQ = newQ[0][move]
            # update, reward : update = reward if reward = 100, else = reward + gamma*maxQ
            if reward == -1:  # non-terminal state
                update = (reward + (gamma * maxQ))
            else:  # terminal state
                update = reward
            # set y = qval, and y[action] = update => assigning reward value for action.
            y = np.zeros((1, 81))
            y[:] = qval[:]
            y[0][move] = update
            # fit the model according to present shape and y
            model.fit(board2input(game, 'b'),
                      y,
                      batch_size=1,
                      nb_epoch=1,
                      verbose=0)
            game = copy.copy(new_game)
        print('game ' + str(i) + ' ends here')
        if epsilon > 0.1:
            epsilon -= (1 / epochs)
            #print ('epsilon : ' + str(epsilon))

    model.save('test_model_1.h5')
def main():
    model = Sequential()
    model.add(
        Dense(units=200,
              kernel_initializer='uniform',
              activation='relu',
              input_shape=(243, )))
    model.add(Dense(units=400, kernel_initializer='uniform',
                    activation='relu'))
    model.add(Dense(units=200, kernel_initializer='uniform',
                    activation='relu'))
    model.add(
        Dense(units=81, kernel_initializer='uniform', activation='linear'))

    rms = RMSprop()
    model.compile(loss='mse', optimizer=rms)

    epochs = 50000
    gamma = 0.975
    epsilon = 1
    batchSize = 50
    buffer = 100
    replay = []
    h = 0
    for i in range(epochs):
        col_coord, row_coord = random.randint(0, 8), random.randint(0, 8)
        #print(col_coord,row_coord)
        game = Game()
        game = init_game(game, col_coord, row_coord)
        status = 1
        reward = -1  # by default at game start
        # game in progress
        while (status == 1):
            qval = model.predict(board2input(game, 'b'), batch_size=1)
            if reward == -1:
                if (random.random() < epsilon):
                    valid_moves = game.get_playable_locations(BLACK)
                    move = random.choice(valid_moves)
                    while move.is_pass == True:
                        move = random.choice(valid_moves)
                        if len(valid_moves) == 0:
                            print('end it')
                    new_game = copy.deepcopy(game)
                    new_game.play(move, 'b')
                    move = move.to_flat_idx()
                else:
                    temp_qval = copy.copy(qval)
                    move = (np.argmax(temp_qval))
                    move = Move.from_flat_idx(move)
                    new_game = copy.deepcopy(game)
                    location = move.to_matrix_location()
                    while new_game.board[location] != EMPTY:
                        temp_qval[0][np.argmax(
                            temp_qval
                        )] = -100  # arbit low value. To get to second max value.
                        move = np.argmax(temp_qval)
                        move = Move.from_flat_idx(move)
                        location = move.to_matrix_location()
                    new_game.play(move, 'b')
                    move = move.to_flat_idx()

            if check_dead_group(new_game, col_coord, row_coord) == True:
                reward = 50
            else:
                reward = -1

            # experience replay storage
            if len(replay) < buffer:
                replay.append((board2input(game, 'b'), move, reward,
                               board2input(new_game, 'b')))
            else:
                if (h < (buffer - 1)):
                    h += 1
                else:
                    h = 0
                replay[h] = (board2input(game, 'b'), move, reward,
                             board2input(new_game, 'b'))
                minibatch = random.sample(replay, batchSize)
                X_train = []
                y_train = []
                for memory in minibatch:
                    (m_game, m_move, m_reward, m_new_game) = memory
                    oldqval = model.predict(m_game, batch_size=1)
                    maxq = oldqval[0][m_move]
                    y = np.zeros(81)
                    y[:] = oldqval
                    if m_reward == 50:
                        update = m_reward
                    else:
                        update = m_reward + gamma * maxq
                    y[m_move] = update
                    X_train.append(m_game)
                    y_train.append(y)
                X_train = np.stack(X_train)
                y_train = np.stack(y_train)
                #print('ytrain: ', y_train[0])
                model.fit(X_train,
                          y_train,
                          batch_size=batchSize,
                          epochs=1,
                          verbose=0)
            game = copy.copy(new_game)
            if reward == 50:
                status = 0
        print('game ' + str(i) + ' ends here')
        #print(game)
        #temp_move = Move.from_flat_idx(move)
        #print(temp_move)
        #print(model.predict(board2input(game,'b'),batch_size=1))
        #input()
        if epsilon > 0.1:
            epsilon -= (1 / epochs)
            #print ('epsilon : ' + str(epsilon))
        if i % 5000 == 0 and i > 0:
            name = 'src/learn/RL_Atari/hard_atari_' + str(i) + '.h5'
            model.save(name)

    model.save('src/learn/RL_Atari/test_model_final.h5')