def start_game(self): if len(self.results) < self.results_length: print("run nr", len(self.results)) self.game_board = Game2048( board=[[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]) self.board = self.game_board.board self.game_board.generate_new_node() self.move_count = 0 #self.draw_board() self.time = time() self.run_algorithm() else: print(self.results) print("Largest tile", max(self.results)) print("Average tile", sum(self.results) / float(len(self.results))) if self.action[0] == "p": self.results_from_nn_playing = copy.copy(self.results) elif self.action[0] == "r": self.results_from_random_playing = copy.copy(self.results) elif self.action[0] == "c": self.results_from_nn_playing = copy.copy(self.results) self.print_comparison() self.results = [] self.user_control() self.start_game()
def update_randomsample(self): #从memeory中采样batch_size个 sample_batch = random.sample(self.memory, self.batch_size) state_batch, action_batch, nextstate_batch, reward_batch, isover_batch = zip( *sample_batch) feed_batch = torch.from_numpy( self.transform_state(state_batch)).to("cuda") #计算Q估计 Q_table = self.network.forward(feed_batch) #Qtable #从Q_table中获得Q(s,a)也就是Q估计 Q_predict = Q_table[np.arange(self.batch_size), action_batch] #计算Q现实 with torch.no_grad(): feed_next_batch = torch.from_numpy( self.transform_state(nextstate_batch)).to("cuda") Q_table_next = self.network.forward(feed_next_batch) Q_nextmax = torch.Tensor(self.batch_size).to("cuda") for i in range(self.batch_size): nextstate = nextstate_batch[i] legal_actions = Game2048.legal_moves(nextstate) Q_nextmax[i] = torch.max( Q_table_next[legal_actions]) if legal_actions else 0 #Q_现实公式 Q_label = torch.Tensor( reward_batch).cuda() + self.reward_decay * Q_nextmax #计算loss并更新 loss = self.loss_fn(Q_predict, Q_label) print("\n{} th train starts with {} loss".format( self.train_steps, loss)) self.optimizer.zero_grad() loss.backward() self.optimizer.step()
def addChild(self, move): child = Game2048(np.copy(self.state.board), self.state.score, implementation=self.implementation) result = child.action(move) if result == 2: child.ended = True self.children.append(GameState(child, move, self))
def env_start(self): self._2048game = Game2048() self.moves_dict = { 0: self._2048game.slideLeft, 1: self._2048game.slideRight, 2: self._2048game.slideUp, 3: self._2048game.slideDown } return self.get_state()
def train(self, mode, modelpath=None): if modelpath: self.network.load_state_dict(torch.load(modelpath)) while self.eposide < self.max_eposide: game = Game2048() state = game.matrix local_steps = 0 # print(game.matrix) while True: # print("{}th before current_score {}".format(local_steps,game.score)) # print(game.matrix,np.array_equal(game.matrix,state)) actionList = self.chooseAction(state, e_greedy=True) action, reward = game.step(state, actionList) nextstate = game.matrix # print(state,game.score) self.memory.append( (state, action, nextstate, reward, game.isover)) #保存状态 # print("{}th after current_score {}".format(local_steps,game.score)) # print(game.matrix,np.array_equal(game.matrix,nextstate)) (local_steps, self.total_steps) = (local_steps + 1, self.total_steps + 1 ) #局部计数器和全局计数器加一 if len(self.memory) >= self.memory_size: self.train_steps += 1 print("{} th train's learning rate :{}".format( self.train_steps, self.scheduler.get_lr())) if mode == "random": self.update_randomsample() else: for i in range(1): self.update_all_batch() self.memory = deque(maxlen=self.memory_size) # print("the {}th training ends".format(self.train_steps)) if self.train_steps % self.save_per_iter == 0: torch.save( self.network.state_dict(), './parameter{}.pkl'.format( self.train_steps // self.save_per_iter % 20)) print("successfully saved") #本轮游戏结束退出当前循环,否则开始下一步 if game.isover: break else: state = nextstate.copy() self.max_num = game.maxnum if game.maxnum > self.max_num else self.max_num self.max_score = game.score if game.score > self.max_score else self.max_score print( "\nEposide{} finished with score:{} maxnumber:{} steps={} ,details:" .format(self.eposide, game.score, game.maxnum, local_steps)) print(game.matrix, "epsilon:{}".format(self.epsilon)) print("up to now max score:{} max number:{}".format( self.max_score, self.max_num)) self.record.append((self.eposide, game.score, game.maxnum, local_steps)) #每局游戏记录局号、一局走了几步、游戏分数 #更新最优参数 self.eposide += 1 torch.save(self.network.state_dict(), './finally.pkl')
def predict(state): state_torch = torch.from_numpy(transform_state(state)).to("cuda") result = NETWORK.forward(state_torch) actionScore = result.cpu().detach().numpy()[0] legals = Game2048.legal_moves(state) print(legals) for action in np.argsort(-actionScore): if action in legals: return action
def start_game(self): print "avg", sum(self.results)/float(len(self.results) + 0.001) if len(self.results) < 30: self.game_board = Game2048(board=[[0,0,0,0],[0,0,0,0],[0,0,0,0],[0,0,0,0]]) self.board = self.game_board.board self.game_board.generate_new_node() self.depth = 4 self.move_count = 0 self.expectimax = Expectimax() self.draw_board() self.time = time() self.run_algorithm() else: print self.results print "avg", sum(self.results)/float(len(self.results))
def __init__(self, state=Game2048(_dims=8), move=None, parent=None, implementation='cpu'): try: self.depth = parent.depth + 1 except: self.depth = 0 self.state = state self.move = move self.parent = parent self.children = [] self.ended = False if implementation == 'gpu': self.implementation = 'gpu' else: self.implementation = 'cpu' return
def update_all_batch(self): all_loss = 0 for i in range(self.memory_size, 0, -self.batch_size): memory = list(self.memory) state_batch, action_batch, nextstate_batch, reward_batch, isover_batch = zip( *memory[i - self.batch_size:i]) feed_batch = torch.from_numpy( self.transform_state(state_batch)).to("cuda") #计算Q估计 Q_table = self.network.forward(feed_batch) #Qtable #从Q_table中获得Q(s,a)也就是Q估计 Q_predict = Q_table[np.arange(self.batch_size), action_batch] #计算Q现实 with torch.no_grad(): feed_next_batch = torch.from_numpy( self.transform_state(nextstate_batch)).to("cuda") Q_table_next = self.network.forward(feed_next_batch) Q_nextmax = torch.Tensor(self.batch_size).to("cuda") for i in range(self.batch_size): nextstate = nextstate_batch[i] legal_actions = Game2048.legal_moves(nextstate) Q_nextmax[i] = torch.max( Q_table_next[legal_actions]) if legal_actions else 0 #Q_现实公式 Q_label = torch.Tensor( reward_batch).cuda() + self.reward_decay * Q_nextmax #计算loss并更新 self.scheduler.step() loss = self.loss_fn(Q_predict, Q_label) all_loss += loss.item() self.optimizer.zero_grad() loss.backward() self.optimizer.step() print("\n{} th train ends with {} loss".format( self.train_steps, all_loss / (self.memory_size // self.batch_size)))
def expand(self, amount): if self.children == []: # Decide which moves are useful possibleMoves = [0, 1, 2, 3] for play in possibleMoves: child = Game2048(np.copy(self.state.board), self.state.score, implementation=self.implementation) result = child.action(play) if result == 0: # print("Bad action: " + str(play)) possibleMoves.remove(play) elif result == 2: child.ended = True self.children.append(GameState(child, play, self)) else: self.children.append(GameState(child, play, self)) # move to useful places 'amount' times move_idx = 0 while len(self.children) < amount: self.addChild(possibleMoves[move_idx]) move_idx += 1 if move_idx > len(possibleMoves) - 1: move_idx = 0 # develop all games that were created moves = int(sqrt(amount)) for child in self.children: for i in range(moves): result = 0 move_tries = 0 while result == 0: result = child.state.action(rnd.randint(0, 3)) move_tries += 1 if move_tries > 8: child.ended = True break pass
def test(self, n_eposide): self.network.load_state_dict(torch.load('./parameter0.pkl')) for i in range(n_eposide): game = Game2048() state = game.matrix local_steps = 0 while True: actionList = self.chooseAction(state, e_greedy=False) action, reward = game.step(state, actionList) # print(game.matrix) local_steps += 1 nextstate = game.matrix if game.isover: break else: state = nextstate.copy() print("{}th eposide : gamescore={} maxnumber={} steps = {}".format( self.eposide, game.score, game.maxnum, local_steps)) self.record.append((self.eposide, game.score, game.maxnum, local_steps)) #每局游戏记录局号、一局走了几步、游戏分数 print(game.matrix) self.eposide += 1 print(sum(r[1] for r in self.record) / len(self.record)) print(max(r[2] for r in self.record))
def Main(): board = auxFuns.random_board(4) game = Game2048(board) return game
from ai import AI ai = AI(state_shape=__default_state_shape__, action_dim=__default_action_dim__, verbose=verbose) if verbose: print("Loading latest AI model from file: [{0}] ...".format( __filename__), end="") ai.load_nnet(__filename__) if verbose: print("OK!") visualizer = VisualizeAI(state_shape=__default_state_shape__, ai=ai, verbose=verbose) visualizer.start() # Run a game to get result visualizer.view() if args.play: print( "Play game. Please close game in terminal after closing window (i.e, Press Ctrl+C)." ) from game2048 import Game2048, Human game2048 = Game2048(state_shape=__default_state_shape__, player=Human(), verbose=verbose) game2048.start()
import numpy as np import tensorflow as tf from game2048 import Game2048 import os from plotting import plot_episode_stats from Estimator import Estimator, deep_q_learning env = Game2048() tf.reset_default_graph() # Where we save our checkpoints and graphs experiment_dir = os.path.abspath("./experiments/") # Create a glboal step variable global_step = tf.Variable(0, name='global_step', trainable=False) # Create estimators q_estimator = Estimator(scope="q_estimator", summaries_dir=experiment_dir) target_estimator = Estimator(scope="target_q") # Run the experiment with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for t, stats in deep_q_learning(sess, env, q_estimator=q_estimator, target_estimator=target_estimator, experiment_dir=experiment_dir, num_episodes=50000, replay_memory_size=50000,
else color_map[value][1]) else: grid, new_tiles, score = game.get_grid(), game.get_new_tiles( ), int(game.get_score()) max_tile = int(grid[~isnan(grid)].max()) [ labels[i].config(text='' if i < 4 or i > 11 else 'GAMEOVER'[i - 4], bg=color_map['base']) for i in range(16) ] tk_root.title('Game Over! Tile acheived: {}, Score: {}'.format( max_tile, score)) if __name__ == '__main__': game, root, window_size = Game2048(), Tk(), 360 root.title('Move tiles to get 2048! Score: 0') root.geometry('{0}x{0}+111+111'.format(window_size)) root.config(background='#bbada0') grid, labels = game.get_grid(), [] for (i, j), value in ndenumerate(grid): frame = Frame(root, width=window_size / 4 - 2, height=window_size / 4 - 2) font = Font() frame.pack_propagate(0) frame.place(x=j * window_size / 4 + 1, y=i * window_size / 4 + 1) (text, color) = ('', color_map['base']) if isnan(value) else ('{}'.format( int(value)), color_map[value][0])
from pynput import keyboard from game2048 import Game2048 game = Game2048(4) game.start_game() def on_press(key): try: if key.char == 'w' or key.char == 'W': game.up() elif key.char == 's' or key.char == 'S': game.down() elif key.char == 'a' or key.char == 'A': game.left() elif key.char == 'd' or key.char == 'D': game.right() elif key.char == 'r' or key.char == 'R': game.undo() elif key.char == 'p' or key.char == 'P': game.new_game() elif key.char == 'c' or key.char == 'C': game.check() except AttributeError: print('special key {0} pressed'.format(key)) def on_release(key): if key == keyboard.Key.esc: # Stop listener return False
def main(): # 游戏初始化 pygame.init() screen = pygame.display.set_mode(SCREENSIZE) pygame.display.set_caption('2048') # 播放背景音乐 pygame.mixer.music.load(BGMPATH) pygame.mixer.music.play(-1) # 实例化2048游戏 game = Game2048() # 游戏主循环 clock = pygame.time.Clock() is_running = True while is_running: screen.fill(pygame.Color(BG_COLOR)) AI_rect = functionButton(screen) # --按键检测 for event in pygame.event.get(): if event.type == pygame.QUIT: pygame.quit() sys.exit() elif event.type == pygame.KEYDOWN: if event.key in [ pygame.K_UP, pygame.K_DOWN, pygame.K_LEFT, pygame.K_RIGHT ]: ismove, movescore, state = game.move( game.matrix, { pygame.K_UP: 'w', pygame.K_DOWN: 's', pygame.K_LEFT: 'a', pygame.K_RIGHT: 'd' }[event.key]) game.score += movescore if ismove: game.generate() elif event.type == pygame.MOUSEBUTTONDOWN: if is_rect(event.pos, AI_rect): action = predict(game.matrix.copy()) ismove, movescore, nextstate = game.move( game.matrix, action) game.matrix = nextstate game.score += movescore if ismove: game.generate() with open("score", 'r', encoding='utf-8') as f: max_score = f.read() # --更新游戏状态 if game.isover: # game_2048.saveMaxScore() is_running = False if game.score > int(max_score): with open("score", 'w', encoding='utf-8') as f: f.write(str(int(game.score))) # --将必要的游戏元素画到屏幕上 drawGameMatrix(screen, game.matrix) drawScore(screen, game.score, max_score) # drawGameIntro(screen, start_x, start_y, cfg) # --屏幕更新 pygame.display.update() clock.tick(FPS) return endInterface(screen)