Beispiel #1
0
 def start_game(self):
     if len(self.results) < self.results_length:
         print("run nr", len(self.results))
         self.game_board = Game2048(
             board=[[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]])
         self.board = self.game_board.board
         self.game_board.generate_new_node()
         self.move_count = 0
         #self.draw_board()
         self.time = time()
         self.run_algorithm()
     else:
         print(self.results)
         print("Largest tile", max(self.results))
         print("Average tile", sum(self.results) / float(len(self.results)))
         if self.action[0] == "p":
             self.results_from_nn_playing = copy.copy(self.results)
         elif self.action[0] == "r":
             self.results_from_random_playing = copy.copy(self.results)
         elif self.action[0] == "c":
             self.results_from_nn_playing = copy.copy(self.results)
             self.print_comparison()
         self.results = []
         self.user_control()
         self.start_game()
Beispiel #2
0
    def update_randomsample(self):
        #从memeory中采样batch_size个
        sample_batch = random.sample(self.memory, self.batch_size)
        state_batch, action_batch, nextstate_batch, reward_batch, isover_batch = zip(
            *sample_batch)
        feed_batch = torch.from_numpy(
            self.transform_state(state_batch)).to("cuda")

        #计算Q估计
        Q_table = self.network.forward(feed_batch)  #Qtable
        #从Q_table中获得Q(s,a)也就是Q估计
        Q_predict = Q_table[np.arange(self.batch_size), action_batch]

        #计算Q现实
        with torch.no_grad():
            feed_next_batch = torch.from_numpy(
                self.transform_state(nextstate_batch)).to("cuda")
            Q_table_next = self.network.forward(feed_next_batch)
            Q_nextmax = torch.Tensor(self.batch_size).to("cuda")
            for i in range(self.batch_size):
                nextstate = nextstate_batch[i]
                legal_actions = Game2048.legal_moves(nextstate)
                Q_nextmax[i] = torch.max(
                    Q_table_next[legal_actions]) if legal_actions else 0
            #Q_现实公式
            Q_label = torch.Tensor(
                reward_batch).cuda() + self.reward_decay * Q_nextmax

        #计算loss并更新
        loss = self.loss_fn(Q_predict, Q_label)
        print("\n{} th train starts with {} loss".format(
            self.train_steps, loss))
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
 def addChild(self, move):
     child = Game2048(np.copy(self.state.board),
                      self.state.score,
                      implementation=self.implementation)
     result = child.action(move)
     if result == 2:
         child.ended = True
     self.children.append(GameState(child, move, self))
Beispiel #4
0
 def env_start(self):
     self._2048game = Game2048()
     self.moves_dict = {
         0: self._2048game.slideLeft,
         1: self._2048game.slideRight,
         2: self._2048game.slideUp,
         3: self._2048game.slideDown
     }
     return self.get_state()
Beispiel #5
0
 def train(self, mode, modelpath=None):
     if modelpath:
         self.network.load_state_dict(torch.load(modelpath))
     while self.eposide < self.max_eposide:
         game = Game2048()
         state = game.matrix
         local_steps = 0
         # print(game.matrix)
         while True:
             # print("{}th before current_score {}".format(local_steps,game.score))
             # print(game.matrix,np.array_equal(game.matrix,state))
             actionList = self.chooseAction(state, e_greedy=True)
             action, reward = game.step(state, actionList)
             nextstate = game.matrix
             # print(state,game.score)
             self.memory.append(
                 (state, action, nextstate, reward, game.isover))  #保存状态
             # print("{}th after current_score {}".format(local_steps,game.score))
             # print(game.matrix,np.array_equal(game.matrix,nextstate))
             (local_steps,
              self.total_steps) = (local_steps + 1, self.total_steps + 1
                                   )  #局部计数器和全局计数器加一
             if len(self.memory) >= self.memory_size:
                 self.train_steps += 1
                 print("{} th train's learning rate :{}".format(
                     self.train_steps, self.scheduler.get_lr()))
                 if mode == "random":
                     self.update_randomsample()
                 else:
                     for i in range(1):
                         self.update_all_batch()
                     self.memory = deque(maxlen=self.memory_size)
                 # print("the {}th training ends".format(self.train_steps))
                 if self.train_steps % self.save_per_iter == 0:
                     torch.save(
                         self.network.state_dict(),
                         './parameter{}.pkl'.format(
                             self.train_steps // self.save_per_iter % 20))
                     print("successfully saved")
             #本轮游戏结束退出当前循环,否则开始下一步
             if game.isover:
                 break
             else:
                 state = nextstate.copy()
         self.max_num = game.maxnum if game.maxnum > self.max_num else self.max_num
         self.max_score = game.score if game.score > self.max_score else self.max_score
         print(
             "\nEposide{} finished with score:{} maxnumber:{} steps={} ,details:"
             .format(self.eposide, game.score, game.maxnum, local_steps))
         print(game.matrix, "epsilon:{}".format(self.epsilon))
         print("up to now max score:{} max number:{}".format(
             self.max_score, self.max_num))
         self.record.append((self.eposide, game.score, game.maxnum,
                             local_steps))  #每局游戏记录局号、一局走了几步、游戏分数
         #更新最优参数
         self.eposide += 1
     torch.save(self.network.state_dict(), './finally.pkl')
Beispiel #6
0
def predict(state):
    state_torch = torch.from_numpy(transform_state(state)).to("cuda")
    result = NETWORK.forward(state_torch)
    actionScore = result.cpu().detach().numpy()[0]
    legals = Game2048.legal_moves(state)
    print(legals)
    for action in np.argsort(-actionScore):
        if action in legals:
            return action
Beispiel #7
0
 def start_game(self):
     print "avg", sum(self.results)/float(len(self.results) + 0.001)
     if len(self.results) < 30:
         self.game_board = Game2048(board=[[0,0,0,0],[0,0,0,0],[0,0,0,0],[0,0,0,0]])
         self.board = self.game_board.board
         self.game_board.generate_new_node()
         self.depth = 4
         self.move_count = 0
         self.expectimax = Expectimax()
         self.draw_board()
         self.time = time()
         self.run_algorithm()
     else:
         print self.results
         print "avg", sum(self.results)/float(len(self.results))
 def __init__(self,
              state=Game2048(_dims=8),
              move=None,
              parent=None,
              implementation='cpu'):
     try:
         self.depth = parent.depth + 1
     except:
         self.depth = 0
     self.state = state
     self.move = move
     self.parent = parent
     self.children = []
     self.ended = False
     if implementation == 'gpu':
         self.implementation = 'gpu'
     else:
         self.implementation = 'cpu'
     return
Beispiel #9
0
    def update_all_batch(self):
        all_loss = 0
        for i in range(self.memory_size, 0, -self.batch_size):
            memory = list(self.memory)
            state_batch, action_batch, nextstate_batch, reward_batch, isover_batch = zip(
                *memory[i - self.batch_size:i])
            feed_batch = torch.from_numpy(
                self.transform_state(state_batch)).to("cuda")

            #计算Q估计
            Q_table = self.network.forward(feed_batch)  #Qtable
            #从Q_table中获得Q(s,a)也就是Q估计
            Q_predict = Q_table[np.arange(self.batch_size), action_batch]

            #计算Q现实
            with torch.no_grad():
                feed_next_batch = torch.from_numpy(
                    self.transform_state(nextstate_batch)).to("cuda")
                Q_table_next = self.network.forward(feed_next_batch)
                Q_nextmax = torch.Tensor(self.batch_size).to("cuda")
                for i in range(self.batch_size):
                    nextstate = nextstate_batch[i]
                    legal_actions = Game2048.legal_moves(nextstate)
                    Q_nextmax[i] = torch.max(
                        Q_table_next[legal_actions]) if legal_actions else 0
                #Q_现实公式
                Q_label = torch.Tensor(
                    reward_batch).cuda() + self.reward_decay * Q_nextmax

            #计算loss并更新
            self.scheduler.step()
            loss = self.loss_fn(Q_predict, Q_label)
            all_loss += loss.item()
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()
        print("\n{} th train ends with {} loss".format(
            self.train_steps,
            all_loss / (self.memory_size // self.batch_size)))
    def expand(self, amount):
        if self.children == []:
            # Decide which moves are useful
            possibleMoves = [0, 1, 2, 3]
            for play in possibleMoves:
                child = Game2048(np.copy(self.state.board),
                                 self.state.score,
                                 implementation=self.implementation)
                result = child.action(play)
                if result == 0:
                    # print("Bad action: " + str(play))
                    possibleMoves.remove(play)
                elif result == 2:
                    child.ended = True
                    self.children.append(GameState(child, play, self))
                else:
                    self.children.append(GameState(child, play, self))

            # move to useful places 'amount' times
            move_idx = 0
            while len(self.children) < amount:
                self.addChild(possibleMoves[move_idx])
                move_idx += 1
                if move_idx > len(possibleMoves) - 1:
                    move_idx = 0

            # develop all games that were created
            moves = int(sqrt(amount))
            for child in self.children:
                for i in range(moves):
                    result = 0
                    move_tries = 0
                    while result == 0:
                        result = child.state.action(rnd.randint(0, 3))
                        move_tries += 1
                        if move_tries > 8:
                            child.ended = True
                            break
        pass
Beispiel #11
0
 def test(self, n_eposide):
     self.network.load_state_dict(torch.load('./parameter0.pkl'))
     for i in range(n_eposide):
         game = Game2048()
         state = game.matrix
         local_steps = 0
         while True:
             actionList = self.chooseAction(state, e_greedy=False)
             action, reward = game.step(state, actionList)
             # print(game.matrix)
             local_steps += 1
             nextstate = game.matrix
             if game.isover:
                 break
             else:
                 state = nextstate.copy()
         print("{}th eposide : gamescore={} maxnumber={} steps = {}".format(
             self.eposide, game.score, game.maxnum, local_steps))
         self.record.append((self.eposide, game.score, game.maxnum,
                             local_steps))  #每局游戏记录局号、一局走了几步、游戏分数
         print(game.matrix)
         self.eposide += 1
     print(sum(r[1] for r in self.record) / len(self.record))
     print(max(r[2] for r in self.record))
Beispiel #12
0
def Main():
    board = auxFuns.random_board(4)
    game = Game2048(board)
    return game
Beispiel #13
0
        from ai import AI

        ai = AI(state_shape=__default_state_shape__,
                action_dim=__default_action_dim__,
                verbose=verbose)

        if verbose:
            print("Loading latest AI model from file: [{0}] ...".format(
                __filename__),
                  end="")
        ai.load_nnet(__filename__)
        if verbose:
            print("OK!")

        visualizer = VisualizeAI(state_shape=__default_state_shape__,
                                 ai=ai,
                                 verbose=verbose)

        visualizer.start()  # Run a game to get result
        visualizer.view()

    if args.play:
        print(
            "Play game. Please close game in terminal after closing window (i.e, Press Ctrl+C)."
        )
        from game2048 import Game2048, Human

        game2048 = Game2048(state_shape=__default_state_shape__,
                            player=Human(),
                            verbose=verbose)
        game2048.start()
import numpy as np
import tensorflow as tf
from game2048 import Game2048
import os
from plotting import plot_episode_stats
from Estimator import Estimator, deep_q_learning

env = Game2048()

tf.reset_default_graph()

# Where we save our checkpoints and graphs
experiment_dir = os.path.abspath("./experiments/")

# Create a glboal step variable
global_step = tf.Variable(0, name='global_step', trainable=False)

# Create estimators
q_estimator = Estimator(scope="q_estimator", summaries_dir=experiment_dir)
target_estimator = Estimator(scope="target_q")

# Run the experiment
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for t, stats in deep_q_learning(sess,
                                    env,
                                    q_estimator=q_estimator,
                                    target_estimator=target_estimator,
                                    experiment_dir=experiment_dir,
                                    num_episodes=50000,
                                    replay_memory_size=50000,
Beispiel #15
0
                                         else color_map[value][1])
        else:
            grid, new_tiles, score = game.get_grid(), game.get_new_tiles(
            ), int(game.get_score())
            max_tile = int(grid[~isnan(grid)].max())
            [
                labels[i].config(text='' if i < 4 or i > 11 else 'GAMEOVER'[i -
                                                                            4],
                                 bg=color_map['base']) for i in range(16)
            ]
            tk_root.title('Game Over! Tile acheived: {}, Score: {}'.format(
                max_tile, score))


if __name__ == '__main__':
    game, root, window_size = Game2048(), Tk(), 360
    root.title('Move tiles to get 2048! Score: 0')
    root.geometry('{0}x{0}+111+111'.format(window_size))
    root.config(background='#bbada0')

    grid, labels = game.get_grid(), []
    for (i, j), value in ndenumerate(grid):
        frame = Frame(root,
                      width=window_size / 4 - 2,
                      height=window_size / 4 - 2)
        font = Font()
        frame.pack_propagate(0)
        frame.place(x=j * window_size / 4 + 1, y=i * window_size / 4 + 1)
        (text, color) = ('',
                         color_map['base']) if isnan(value) else ('{}'.format(
                             int(value)), color_map[value][0])
Beispiel #16
0
from pynput import keyboard
from game2048 import Game2048

game = Game2048(4)
game.start_game()


def on_press(key):
    try:
        if key.char == 'w' or key.char == 'W':
            game.up()
        elif key.char == 's' or key.char == 'S':
            game.down()
        elif key.char == 'a' or key.char == 'A':
            game.left()
        elif key.char == 'd' or key.char == 'D':
            game.right()
        elif key.char == 'r' or key.char == 'R':
            game.undo()
        elif key.char == 'p' or key.char == 'P':
            game.new_game()
        elif key.char == 'c' or key.char == 'C':
            game.check()
    except AttributeError:
        print('special key {0} pressed'.format(key))


def on_release(key):
    if key == keyboard.Key.esc:
        # Stop listener
        return False
Beispiel #17
0
def main():
    # 游戏初始化
    pygame.init()
    screen = pygame.display.set_mode(SCREENSIZE)
    pygame.display.set_caption('2048')
    # 播放背景音乐
    pygame.mixer.music.load(BGMPATH)
    pygame.mixer.music.play(-1)
    # 实例化2048游戏
    game = Game2048()
    # 游戏主循环
    clock = pygame.time.Clock()
    is_running = True
    while is_running:
        screen.fill(pygame.Color(BG_COLOR))
        AI_rect = functionButton(screen)
        # --按键检测
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.quit()
                sys.exit()
            elif event.type == pygame.KEYDOWN:
                if event.key in [
                        pygame.K_UP, pygame.K_DOWN, pygame.K_LEFT,
                        pygame.K_RIGHT
                ]:
                    ismove, movescore, state = game.move(
                        game.matrix, {
                            pygame.K_UP: 'w',
                            pygame.K_DOWN: 's',
                            pygame.K_LEFT: 'a',
                            pygame.K_RIGHT: 'd'
                        }[event.key])
                    game.score += movescore
                    if ismove:
                        game.generate()
            elif event.type == pygame.MOUSEBUTTONDOWN:
                if is_rect(event.pos, AI_rect):
                    action = predict(game.matrix.copy())
                    ismove, movescore, nextstate = game.move(
                        game.matrix, action)
                    game.matrix = nextstate
                    game.score += movescore
                    if ismove:
                        game.generate()
        with open("score", 'r', encoding='utf-8') as f:
            max_score = f.read()
        # --更新游戏状态
        if game.isover:
            # game_2048.saveMaxScore()
            is_running = False
            if game.score > int(max_score):
                with open("score", 'w', encoding='utf-8') as f:
                    f.write(str(int(game.score)))
        # --将必要的游戏元素画到屏幕上
        drawGameMatrix(screen, game.matrix)
        drawScore(screen, game.score, max_score)
        # drawGameIntro(screen, start_x, start_y, cfg)
        # --屏幕更新
        pygame.display.update()
        clock.tick(FPS)
    return endInterface(screen)