Python Game.get_playable_locations 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: src.play.model.Game

클래스/타입: Game

메소드/함수: get_playable_locations

hotexamples.com에서의 예제들: 2

Python Game.get_playable_locations - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 src.play.model.Game.Game.get_playable_locations에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Game(7)

play(5)

get_playable_locations(2)

__str__(1)

board2file(1)

evaluate_points(1)

start(1)

예제 #1

파일 보기

파일: train_atari.py 프로젝트: benjaminaaron/GO_DILab

def main():
    model = Sequential()
    model.add(
        Dense(units=200,
              kernel_initializer='uniform',
              activation='relu',
              input_shape=(243, )))
    model.add(Dense(units=400, kernel_initializer='uniform',
                    activation='relu'))
    model.add(Dense(units=200, kernel_initializer='uniform',
                    activation='relu'))
    model.add(
        Dense(units=81, kernel_initializer='uniform', activation='linear'))

    rms = RMSprop()
    model.compile(loss='mse', optimizer=rms)

    col_coord, row_coord = 1, 6  #random.randint(0, 8), random.randint(0, 8)
    epochs = 10
    gamma = 0.9
    epsilon = 1
    for i in range(epochs):

        game = Game()
        game = init_game(game, col_coord, row_coord)
        status = 1
        # game in progress
        while (status == 1):
            qval = model.predict(board2input(game, 'b'), batch_size=1)
            if (random.random() < epsilon):
                valid_moves = game.get_playable_locations('b')
                move = random.choice(valid_moves)
                while move.is_pass == True:
                    move = random.choice(valid_moves)
                new_game = copy.deepcopy(game)
                new_game.play(move, 'b')
                move = move.to_flat_idx()
            else:
                temp_qval = copy.copy(qval)
                move = (np.argmax(temp_qval))
                move = Move.from_flat_idx(move)
                new_game = copy.deepcopy(game)
                location = move.to_matrix_location()
                while new_game.board[location] != EMPTY:
                    temp_qval[0][np.argmax(
                        temp_qval
                    )] = -100  # arbit low value. To get to second max value.
                    move = np.argmax(temp_qval)
                    move = Move.from_flat_idx(move)
                    location = move.to_matrix_location()
                new_game.play(move, 'b')
                move = move.to_flat_idx()

            if check_dead_group(new_game, col_coord, row_coord) == True:
                reward = 10
                status = 0
            else:
                reward = -1

            # get maxQ from new state
            newQ = model.predict(board2input(game, 'b'), batch_size=1)
            maxQ = newQ[0][move]
            # update, reward : update = reward if reward = 100, else = reward + gamma*maxQ
            if reward == -1:  # non-terminal state
                update = (reward + (gamma * maxQ))
            else:  # terminal state
                update = reward
            # set y = qval, and y[action] = update => assigning reward value for action.
            y = np.zeros((1, 81))
            y[:] = qval[:]
            y[0][move] = update
            # fit the model according to present shape and y
            model.fit(board2input(game, 'b'),
                      y,
                      batch_size=1,
                      nb_epoch=1,
                      verbose=0)
            game = copy.copy(new_game)
        print('game ' + str(i) + ' ends here')
        if epsilon > 0.1:
            epsilon -= (1 / epochs)
            #print ('epsilon : ' + str(epsilon))

    model.save('test_model_1.h5')

예제 #2

파일 보기

파일: train_harder_atari.py 프로젝트: benjaminaaron/GO_DILab

def main():
    model = Sequential()
    model.add(
        Dense(units=200,
              kernel_initializer='uniform',
              activation='relu',
              input_shape=(243, )))
    model.add(Dense(units=400, kernel_initializer='uniform',
                    activation='relu'))
    model.add(Dense(units=200, kernel_initializer='uniform',
                    activation='relu'))
    model.add(
        Dense(units=81, kernel_initializer='uniform', activation='linear'))

    rms = RMSprop()
    model.compile(loss='mse', optimizer=rms)

    epochs = 50000
    gamma = 0.975
    epsilon = 1
    batchSize = 50
    buffer = 100
    replay = []
    h = 0
    for i in range(epochs):
        col_coord, row_coord = random.randint(0, 8), random.randint(0, 8)
        #print(col_coord,row_coord)
        game = Game()
        game = init_game(game, col_coord, row_coord)
        status = 1
        reward = -1  # by default at game start
        # game in progress
        while (status == 1):
            qval = model.predict(board2input(game, 'b'), batch_size=1)
            if reward == -1:
                if (random.random() < epsilon):
                    valid_moves = game.get_playable_locations(BLACK)
                    move = random.choice(valid_moves)
                    while move.is_pass == True:
                        move = random.choice(valid_moves)
                        if len(valid_moves) == 0:
                            print('end it')
                    new_game = copy.deepcopy(game)
                    new_game.play(move, 'b')
                    move = move.to_flat_idx()
                else:
                    temp_qval = copy.copy(qval)
                    move = (np.argmax(temp_qval))
                    move = Move.from_flat_idx(move)
                    new_game = copy.deepcopy(game)
                    location = move.to_matrix_location()
                    while new_game.board[location] != EMPTY:
                        temp_qval[0][np.argmax(
                            temp_qval
                        )] = -100  # arbit low value. To get to second max value.
                        move = np.argmax(temp_qval)
                        move = Move.from_flat_idx(move)
                        location = move.to_matrix_location()
                    new_game.play(move, 'b')
                    move = move.to_flat_idx()

            if check_dead_group(new_game, col_coord, row_coord) == True:
                reward = 50
            else:
                reward = -1

            # experience replay storage
            if len(replay) < buffer:
                replay.append((board2input(game, 'b'), move, reward,
                               board2input(new_game, 'b')))
            else:
                if (h < (buffer - 1)):
                    h += 1
                else:
                    h = 0
                replay[h] = (board2input(game, 'b'), move, reward,
                             board2input(new_game, 'b'))
                minibatch = random.sample(replay, batchSize)
                X_train = []
                y_train = []
                for memory in minibatch:
                    (m_game, m_move, m_reward, m_new_game) = memory
                    oldqval = model.predict(m_game, batch_size=1)
                    maxq = oldqval[0][m_move]
                    y = np.zeros(81)
                    y[:] = oldqval
                    if m_reward == 50:
                        update = m_reward
                    else:
                        update = m_reward + gamma * maxq
                    y[m_move] = update
                    X_train.append(m_game)
                    y_train.append(y)
                X_train = np.stack(X_train)
                y_train = np.stack(y_train)
                #print('ytrain: ', y_train[0])
                model.fit(X_train,
                          y_train,
                          batch_size=batchSize,
                          epochs=1,
                          verbose=0)
            game = copy.copy(new_game)
            if reward == 50:
                status = 0
        print('game ' + str(i) + ' ends here')
        #print(game)
        #temp_move = Move.from_flat_idx(move)
        #print(temp_move)
        #print(model.predict(board2input(game,'b'),batch_size=1))
        #input()
        if epsilon > 0.1:
            epsilon -= (1 / epochs)
            #print ('epsilon : ' + str(epsilon))
        if i % 5000 == 0 and i > 0:
            name = 'src/learn/RL_Atari/hard_atari_' + str(i) + '.h5'
            model.save(name)

    model.save('src/learn/RL_Atari/test_model_final.h5')