def create_data(agent, testsize=40000): create_time = datetime.now().strftime('%m%d-%H%M%S') boards = np.empty((testsize, 256)) moves = np.empty((testsize, 4)) change = 0 game = Game2048() for i in range(testsize): try: if change == 2: print(THREADSTR, "%i\t%i" % (i, game.score)) game = Game2048() direction = agent.make_a_move(game) boards[i] = utl.create_one_hot_repr(game.board) move = np.zeros(4) move[direction] = 1 moves[i] = move change = game.play(direction) except KeyboardInterrupt: boards = boards[:i] moves = moves[:i] i -= 1 break print(THREADSTR, "Datapunkter genereret:", i + 1) path = realpath('../pretraindata/%s_t%i_pretrain_%s' % (create_time, THREAD, agent.displayname)) np.savez(path, boards, moves) return path
def playThroughStdin(game: Game2048): # 0 - left, 1 - up, 2 - right, 3 - down directions = {'W': 1, 'S': 3, 'A': 0, 'D': 2} game.reset() moveCount = 0 try: while True: print(game.boardAsString()) inStr = input('Enter direction of swipe: ') dir = directions[inStr.upper()] score, ended, valid = game.swipe(dir) if not valid: print('Invalid move, try again...') continue moveCount += 1 if ended: print( f'Game Over. Score: {2**game.score} after {moveCount} turns.' ) return else: print(f'Swipe #{moveCount} gave {score} points.') except KeyboardInterrupt: print('Game aborted.')
def __init__(self, size_board, seed=None): self.__size_board = size_board self.__game = Game2048(size_board) self.__zeros = 2.7 self.__smooth = 1.1 self.__var = -1.1 self.__weight = 0.3 # Numbers of possible movements self.action_space = spaces.Discrete(4) # Numbers of observations self.observation_space = spaces.Box(0, 2**16, (size_board * size_board, ), dtype=np.int) # Reward range self.reward_range = (0., np.inf) # Initialise seed self.np_random, seed = seeding.np_random(seed) # Legends self.__actions_legends = {0: "UP", 1: "DOWN", 2: "RIGHT", 3: "LEFT"} # Old max self.__old_max = 0 # Debug self.__last_action = None self.__last_scores_move = None self.valid_movements = []
def playbatch(self, ngames=50): scores = np.empty(ngames) running_reward = 10 for g in range(ngames): # Starter nyt spil game = Game2048() c = _Counter() oldscore = 0 while True: c.count() # Foretager en tur action = self.select_action(game) change = game.play(action) while change == 0: action = self.select_action(game, False) change = game.play(action) self.policy.episode_rewards.append(game.score - oldscore) # Når spillet er færdigt if change == 2: break running_reward = running_reward * self.gamma + c.c * .01 oldscore = game.score scores[g] = game.score self.finish_episode() return np.mean(scores)
def run(): game = Game2048() while True: prettyBoard = (2**game.board) prettyBoard[game.board == 0] = 0 print(prettyBoard) try: direction = int( input( "Angiv retning, du vil bevæge spillerpladen i: \n \t(0 for venstre, 1 for op, 2 for højre, 3 for ned)\n" )) if game.play(direction) == 2: break print("\n\tScore: {0}\n".format(game.score)) except: print("\tFEJL: Dit input stinker") print("Desværre, basse, du tabte. Din score blev {0}".format(game.score)) pass
def run(): game = Game2048(deterministic=[True, True]) while True: prettyBoard = (2**game.board) prettyBoard[game.board == 0] = 0 print(prettyBoard) print(game.score) try: direction = int( input( "Angiv retning, du vil bevæge spillerpladen i: \n \t(0 for venstre, 1 for op, 2 for højre, 3 for ned)\n" )) if direction == -1: raise IndentationError if game.play(direction) == 2: break print("\n\tScore: {0}\n".format(game.score)) except IndentationError: import sys sys.exit() except: print("\tFEJL: Dit input stinker") print("Desværre, basse, du tabte. Din score blev {0}".format(game.score)) pass
def playBatch(W1, W2, learningRate, batchSize = 100, lastmean=0): score, maxTiles = np.empty(batchSize), np.empty(batchSize) newW1, newW2 = torch.clone(W1).detach(), torch.clone(W2).detach() newW1.requires_grad = False newW2.requires_grad = False scoremean = 0 for i in range(batchSize): game = Game2048() directions = list() prob = list() while True: logp = policyNetwork(torch.Tensor(game.board).view(16), W1, W2) with torch.no_grad(): direction = weightedChoice(logp) change = game.play(direction) while True: if change != 0: break else: with torch.no_grad(): direction = weightedChoice(logp) change = game.play(direction) if change == 2: break prob.append(logp) directions.append(direction) maxValue = np.max(game.board) reward = int(rewardFunction(game.board, game.score, scoremean)) for j in range(len(directions)): gradients = torch.Tensor([0, 0, 0, 0]) gradients[directions[j]] = reward prob[j].backward(gradients) with torch.no_grad(): newW1 += reward * learningRate * W1.grad newW2 += reward * learningRate * W2.grad # print(W1.mean(), W2.mean()) # print(W1[W1>0].size(), W2[W2>0].size()) # print(W1) W1.grad.zero_() W2.grad.zero_() score[i] = game.score maxTiles[i] = maxValue # scoremean = return score, maxTiles, newW1, newW2
def _makeTurn(self, net: AgentNet, game: Game2048, eps: float): state = game.board.copy() if random.random() < eps: valid = False moves = [0, 1, 2, 3] while not valid: dir = moves[random.randrange(len(moves))] reward, ended, valid = game.swipe(dir) moves.remove(dir) else: action = net(net.prepareInputs(state)).squeeze(0) valid = False while not valid: dir = torch.argmax(action).item() reward, ended, valid = game.swipe(dir) action[dir] = torch.min(action) - 0.1 return state, dir, reward, ended, game.board.copy()
def play_batch(self, batchsize, comparescore=0): n = 5000 #lister til at gemme resultater scores = np.empty(batchsize) maxtiles = np.empty(batchsize) turns = np.empty(batchsize) for i in range(batchsize): #Påbegynd spil game = Game2048() #Pre-spiller spillet #game = self.preplay_game(game, alg.runInRing, 2400) # Score hver tur turnscores = torch.empty(n) #For-loop for en sikkerheds skyld for turn in range(n): #Henter spillebrættet som vektor gameState = torch.Tensor(game.board, device=device).view( game.n**2).unsqueeze(0) #Finder valget baseret på feed-forward gennem policy-netværket og udfører det choice = self.make_choice(gameState) change = game.play(choice) turnscores[turn] = int(game.score) #Hvis spillet er tabt if change == 2: break #Gemmer reward turnscores = turnscores[:turn + 1] # self.policy.rewards.append(turnscores) self.policy.rewards.append(self.discount_reward(turnscores)) #Gemmes score, maxtiles og sidste tur scores[i] = game.score maxtiles[i] = 2**np.max(game.board) turns[i] = turn # Laver rewards om en til en 1-dimensionel tensor self.policy.rewards = torch.cat(self.policy.rewards) # import matplotlib.pyplot as plt # plt.plot(self.policy.rewards.numpy()) # plt.show() return scores, maxtiles, turns
def maxValue(self, game, depth, max_depth): bestScore = -1 bestMove = 0 state = GameState(game.getGrid(), game.getScore(), game.isGameOver()) for move in self.moves: newGame = Game2048(prev_state=state) if newGame.moveIsValid(move): newGame.move(move) value = self.expectedValue(newGame, depth, max_depth) if value > bestScore: bestScore = value bestMove = move return bestMove, bestScore
def playAGame(func): game = Game2048() count = 0 while True: direction = func(game, count) count += 1 if game.play(direction) == 2: maxValue = np.max(game.board) break return game.score, maxValue
def run_evaluation(paths, agent, evals=10000, with_show=True): # Evaluates an agent threadstr = "T%i" % THREAD scores = np.empty(evals) maxtiles = np.empty(evals) agentstr = "Agent: %s\n" % agent.displayname file_out = open(paths["log_eval"], "w+", encoding="utf-8") file_out.write(agentstr) file_out.close() for i in range(evals): # Starts new game game = Game2048() change = 0 while change != 2: choice = agent.make_a_move(game) change = game.play(choice) scores[i] = game.score maxtiles[i] = 2**np.max(game.board) print(threadstr, i, game.score, 2**np.max(game.board)) # String with evaluation results resstr = "Gns. score: {0}, std. på score: {1}\nMaxtile: {2}, gns. maxtile: {3}\nFord. af maxtile: {4}".format( int(np.mean(scores)), int(np.std(scores)), int(np.max(maxtiles)), round(np.mean(maxtiles), 2), max_tile_distribution(maxtiles)) print(threadstr, resstr) # Bootstrap statistics boot_mu, boot_std = bootstrap(scores) boot_str = "BOOTSTRAP: Gns. score: %i, std. på score: %i" % (boot_mu, boot_std) print(threadstr, boot_str) # Writes log file file_out = open(paths["log_eval"], "a", encoding="utf-8") file_out.write(resstr + "\n" + boot_str + "\n") file_out.write("Score\tMaxtile\n") for s, m in zip(scores, maxtiles): file_out.write("%i\t%i\n" % (s, m)) file_out.close() # Creates plot evalplot(scores, maxtiles, agent, paths["plot_eval"], with_show=with_show)
def expectedValue(self, game, depth, max_depth): state = GameState(game.getGrid(), game.getScore(), game.isGameOver()) emptyTiles = game.getEmptyTiles() n_empty = len(emptyTiles) score = 0 for tile in emptyTiles: for tileValue, prob in zip([2, 4], [0.9, 0.1]): newGame = Game2048(prev_state=state) newGame.placeTile(tile, tileValue) if depth < max_depth: move, value = self.maxValue(newGame, depth + 1, max_depth) else: value = self.evaluate(newGame) score += (prob / n_empty) * value return score
def nextMoveRecur(self, game, depth, max_depth, base=0.9): bestScore = -1 bestMove = 0 state = GameState(game.getGrid(), game.getScore(), game.isGameOver()) for move in self.moves: newGame = Game2048(prev_state=state) if (newGame.moveIsValid(move)): newGame.move(move) score = self.evaluate(newGame) if depth <= max_depth: my_move, my_score = self.nextMoveRecur( newGame, depth + 1, max_depth) score += my_score * pow(base, depth + 1) if score > bestScore: bestMove = move bestScore = score return (bestMove, bestScore)
async def on_message(message): if not message.content.startswith('!2048'): return ch = message.channel banmen = None if message.content == '!2048 edit': await ch.send('盤面を送信してください') msg = await client.wait_for( 'message', check=lambda m: m.author.id == message.author.id and m.channel.id == message.channel.id, timeout=60) banmen = list( map(lambda x: list(map(int, x.split())), msg.content.split('\n'))) game = Game2048(banmen) before = await send_image(ch, game) before2 = None while game.has_0(): before2 = await ch.send('入力をどうぞ') try: msg = await client.wait_for( 'message', check=lambda m: m.author.id == message.author.id and m.channel. id == message.channel.id and m.content in ['w', 'a', 's', 'd', 'q'], timeout=60) except Exception: return content = msg.content if content == 'q': await ch.send('終了') return if content == 'w': game.up() if content == 'a': game.left() if content == 's': game.down() if content == 'd': game.right() game.set_2() await before.delete() await before2.delete() before = await send_image(ch, game) await ch.send('負け。')
def getBoard(self): """ Gets the tile value for each tile in the grid """ self.window = ImageGrab.grab() newGrid = [0 for _ in range(16)] for index, coord in enumerate(TILE_COORDINATES): try: newGrid[index] = self.getTileValue(coord) state = GameState(newGrid, 0, self.gameOver) self.game = Game2048(prev_state=state) except KeyError: print(self.window.getpixel(coord)) self.gameOver = True break return self.game, self.gameOver
def playbatch(self, idx, ngames=100): newweights = [w.clone().detach() for w in self.weights] scores = [] maxtiles = [] for i in range(ngames): directions = [] logps = [] game = Game2048() # Spiller et spil for k in range(100): logp = self.feedforward( torch.Tensor(game.board.reshape(game.n**2)), newweights) while True: with torch.no_grad(): direction = self.sampler(logp) change = game.play(direction) if change in (1, 2): break directions.append(direction) logps.append(logp) if change == 2: break reward = self.reward(game.score) for k in range(len(directions)): y = torch.zeros(game.n) y[directions[k]] = reward logps[k].backward(y) with torch.no_grad(): for i in range(self.nlayers - 1): newweights[i] += self.eta * reward * self.weights[ i].grad / self.weights[i].grad.norm() self.weights[i].zero_() scores.append(game.score) maxtiles.append(game.board.max()) return newweights, np.mean(scores), np.mean(maxtiles)
def playBatch(self, batchSize=50): #lister til at gemme resultater scores = np.zeros(batchSize) maxtiles = np.zeros(batchSize) turns = np.zeros(batchSize) for i in range(batchSize): #Påbegynd spil game = Game2048() oldscore = 0 #For-loop for en sikkerheds skyld for turn in range(5000): #Henter spillebrættet som vektor gameState = torch.Tensor(game.board).view(16).unsqueeze(0) #Finder valget baseret på feed-forward gennem policy-netværket choice = self.makeChoice(gameState) #Udfører valget change = game.play(choice) #Henter score og beregner reward som ændring i score score = game.score reward = score - oldscore #Gemmer rewarden self.policy.rewardList.append(reward) #Hvis spillet er tabt if change == 2: break oldscore = score #Gemmes score, maxtiles og sidste tur scores[i] = game.score maxtiles[i] = 2**np.max(game.board) turns[i] = turn #Når spillet er s**t udføres policy-update self.doPolicyUpdate() return scores, maxtiles, turns
def __init__(self, n=4, seed=None): pygame.init() # Info on dimensions self.n = n self.tile_side = 130 self.margin = 10 self.score_board_height = 40 self.width = self.tile_side * self.n + self.margin self.height = self.width + self.score_board_height self.size = (self.width, self.height) # The game screen pygame.display.set_caption("2048") self.screen = pygame.display.set_mode(self.size) # Game core self.game = Game2048(self.n, seed) self.on_end_screen = False
def play_batch(self, batchsize): n = 5000 # Arrays to save results scores = np.empty(batchsize) maxtiles = np.empty(batchsize) turns = np.empty(batchsize) propturns = np.empty(batchsize) for i in range(batchsize): # Starts a game game = Game2048(deterministic=self.params["determinism"]) rewarder = self.params["rewarder"] for turn in range(n): # Decides an action based on a feed forward through the policy network and executes the action change = self.make_choice(game) # Reward is given and is saved in the reward class rewarder.reward(game, turn) # If the game is lost if change == 2: break rewards = rewarder.final_reward(game, turn) #Saves the reward self.rewards.append(torch.Tensor(rewards)) rewarder.clear() # Saves the score, maxtile and the last round of the game scores[i] = int(game.score) maxtiles[i] = 2 ** int(np.max(game.board)) turns[i] = game.moves propturns[i] = game.propermoves # Reshapes the rewards into a one dimensional tensor self.rewards = torch.cat(self.rewards) return scores, maxtiles, turns, propturns, rewards
def gamedriver(stdscr): curses.noecho() highscore = 0 def redraw(state): stdscr.clear() stdscr.addstr(0, 0, 'High score: {0}'.format(highscore)) stdscr.addstr(1, 0, 'Current score: {0}'.format(g.score)) for y in range(4): for x in range(4): stdscr.addstr(2 + y, x*5, str(state[y][x])) keys = { curses.KEY_DOWN : 'down', curses.KEY_UP : 'up', curses.KEY_LEFT : 'left', curses.KEY_RIGHT : 'right', } overrides = { ord('u') : lambda: g.undo(), } while True: g = Game2048() redraw(g.state) try: while True: key = stdscr.getch() if key in overrides: overrides[key]() redraw(g.state) elif key in keys: g.move(keys[key]) highscore = max(highscore, g.score) redraw(g.state) # rerender except Game2048.GameOver: stdscr.addstr(5, 0, 'Game over. Press ENTER to start a new game.') while stdscr.getch() not in [curses.KEY_ENTER, ord('\n'), ord('\r')]: pass
def __init__(self, evaluation_mode: bool = False): super().__init__() self._evaluation_mode = evaluation_mode self._action_spec = array_spec.BoundedArraySpec(shape=(), dtype=np.int32, minimum=0, maximum=3, name='action') self._observation_spec = array_spec.BoundedArraySpec( shape=(4, 4, 1), dtype=np.float32, minimum=1, maximum=12, name='observation') self._state = np.zeros((4, 4, 1)) self._episode_ended = False self._moves = [] self.best_score = 0 if self._evaluation_mode: self.station = Station(Game2048()) self._moves.append( lambda: self.station.game_window().send_keys(Keys.UP)) self._moves.append( lambda: self.station.game_window().send_keys(Keys.RIGHT)) self._moves.append( lambda: self.station.game_window().send_keys(Keys.DOWN)) self._moves.append( lambda: self.station.game_window().send_keys(Keys.LEFT)) else: self.game = Game2048Mem(Board()) self.game.start() self._moves.append(lambda: self.game.link_keys(0)) self._moves.append(lambda: self.game.link_keys(1)) self._moves.append(lambda: self.game.link_keys(2)) self._moves.append(lambda: self.game.link_keys(3)) self.score = 0
import sys import numpy as np from qtest import QNN, Agent from game import Game2048 from utilities import create_one_hot_repr import reward_functions as rf if __name__ == '__main__': agent = Agent(gamma = 0.9, epsilon = 1.0, lr = 0.003, max_memory = 5000, replace=None) while agent.memory_stored < agent.max_memory: game = Game2048() rewarder = rf.ScoreChange() state = create_one_hot_repr(game.board) change = 1 i = 0 while change != 2: action = np.random.randint(4) change = game.play(action) state_new = create_one_hot_repr(game.board) rewarder.reward(game, i) reward = rewarder.rewards[i] i += 1
def __init__(self): self.game = Game2048() self.gameOver = False self.initializeGame()
def create_one_game(self): """Generate a new game instance""" return Game2048(task_name=self.result_path, game_mode=False)
''' Main file of the game: Implementation of the Interface with pygame ''' import pygame from game import Game2048 from pygame_window import PyGameWindow if __name__ == '__main__': pygame.init() game2048 = Game2048() pygameWindow = PyGameWindow(game2048) pygameWindow.launch() pygame.quit()
def __init__(self, agent = None, timestep = .5, **kwargs): super().__init__(**kwargs) self.agent = agent self.timestep = timestep self.game = Game2048()
def train(model, episodes=100, ckpt=None, manager=None): big_tic = time.time() game = Game2048(seed=1) memory = Memory() # Track progress scores = [] highest_tiles = [] steps_list = [] # If ckpt and manager were passed, set flag to save training checkpoints save_ckpts = ckpt is not None and manager is not None # Aux function to print training log def print_data(data): [print((str(item) + '\t').expandtabs(15), end='') for item in data] print("") with open('training_log.csv', 'a', newline='') as f: csv.writer(f, delimiter='\t').writerow(data) for episode in range(episodes): if episode % 100 == 0: print_data([ "Episode", "Time", "Reward", "Score", "Highest", "L", "U", "R", "D", "Steps" ]) # Reinitialize game and progress-tracking variables tic = time.time() game.new_game() _, observation = game.current_state() observation = preprocess_obs(observation) memory.clear() action_history = [0, 0, 0, 0] old_score = 0 steps = 0 while True: # Select feasible action based on the model, and perform it in the game action = choose_action(model, observation, np.array(game.possible_moves())) next_observation, score, done, tiles_merged = game.step(action) # TODO: Rethink how the reward is obtained. Maybe getting the score at each step # is not the best strategy. Other possibilities are: getting the final score of # the game; getting the final sum of tiles; getting the difference between the # sum of tiles now and in previous step; or a mixture of the mentioned strategies. # Maybe use metrics from the preprocessed observations instead of the raw ones. # # Need to experiment a bit more # reward1 = min((score - old_score)/1024, 1) # reward2 = min(tiles_merged/4, 1) # reward = 0.7*reward1 + 0.3*reward2 reward = score - old_score next_observation = preprocess_obs(next_observation) old_score = score memory.add_to_memory(observation, action, reward) observation = next_observation action_history[action] += 1 steps += 1 # Train model at the end of each episode if done: # Calculate total reward of the episode and store it in the history total_reward = sum(memory.rewards) scores.append(score) highest_tile = int(2**np.max(observation)) highest_tiles.append(highest_tile) steps_list.append(steps) time_since_start = time.time() - big_tic if time_since_start < 100: elapsed_time = "{:.1f}s (+{:.1f}s)".format( time_since_start, time.time() - tic) else: elapsed_time = "{}s (+{:.1f}s)".format( int(np.round(time_since_start)), time.time() - tic) print_data([ episode, elapsed_time, total_reward, score, highest_tile, *action_history, steps ]) # Train the model using the stored memory train_step(model, optimizer, observations=np.vstack(memory.observations), actions=np.array(memory.actions), discounted_rewards=discount_rewards(memory.rewards)) # Save training checkpoint for every tenth episode if save_ckpts and (episode + 1) % 1000 == 0: save_path = manager.save() # print("Saved checkpoint for episode {}: {}\n".format(episode, save_path)) memory.clear() break big_elapsed = int(time.time() - big_tic) print("\nTotal training time: {}s\n".format(big_elapsed)) return model, [scores, highest_tiles, steps_list]
def restart_game(self, dt): self.game = Game2048() self.main_loop()
def TestGame(): print('Running Game2048 tests...') passes = 0 fails = 0 cases = [ { # 0 'board': [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], 'score': 0, 'ended': False, 'valid': False }, { # 1 'board': [[0, 0, 0, 0], [0, 2, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], 'score': 0, 'ended': False, 'valid': True }, { # 2 'board': [[0, 0, 0, 0], [0, 1, 0, 1], [0, 0, 0, 0], [0, 0, 0, 0]], 'score': 4, 'ended': False, 'valid': True }, { # 3 'board': [[0, 0, 0, 0], [0, 1, 1, 2], [0, 0, 0, 0], [0, 0, 0, 0]], 'score': 4, 'ended': False, 'valid': True }, { # 4 'board': [[0, 0, 0, 0], [1, 0, 1, 2], [0, 0, 0, 0], [0, 0, 0, 0]], 'score': 4, 'ended': False, 'valid': True }, { # 5 'board': [[0, 0, 0, 0], [1, 0, 1, 2], [2, 0, 2, 3], [0, 0, 0, 0]], 'score': 12, 'ended': False, 'valid': True }, { # 6 'board': [[0, 0, 0, 0], [0, 1, 1, 1], [0, 0, 0, 0], [0, 0, 0, 0]], 'score': 4, 'ended': False, 'valid': True }, { # 7 'board': [[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]], 'score': 0, 'ended': False, 'valid': False }, { # 8 'board': [[0, 0, 0, 0], [0, 10, 0, 10], [0, 0, 0, 0], [0, 0, 0, 0]], 'score': 2048, 'ended': True, 'valid': True }, { # 9 'board': [[4, 4, 3, 4], [4, 3, 4, 5], [3, 4, 5, 6], [4, 5, 6, 7]], 'score': -11, 'ended': True, 'valid': True }, { # 10 'board': [[0, 0, 0, 0], [1, 1, 1, 1], [0, 0, 0, 0], [0, 0, 0, 0]], 'score': 8, 'ended': False, 'valid': True }, ] game = Game2048() for i in range(len(cases)): case = cases[i] game.reset() game.board = np.array(case['board']) score, ended, valid = game.swipe(2) if score != case['score'] or ended != case['ended'] or valid != case[ 'valid']: print( f'FAIL: Test {i}, got: {score} ({ended}, {valid}), expected {case["score"]} ({case["ended"]}, {case["valid"]})' ) print(game.boardAsString()) fails += 1 else: passes += 1