コード例 #1
0
def create_data(agent, testsize=40000):

    create_time = datetime.now().strftime('%m%d-%H%M%S')

    boards = np.empty((testsize, 256))
    moves = np.empty((testsize, 4))

    change = 0
    game = Game2048()
    for i in range(testsize):
        try:
            if change == 2:
                print(THREADSTR, "%i\t%i" % (i, game.score))
                game = Game2048()

            direction = agent.make_a_move(game)
            boards[i] = utl.create_one_hot_repr(game.board)

            move = np.zeros(4)
            move[direction] = 1

            moves[i] = move

            change = game.play(direction)
        except KeyboardInterrupt:
            boards = boards[:i]
            moves = moves[:i]
            i -= 1
            break

    print(THREADSTR, "Datapunkter genereret:", i + 1)
    path = realpath('../pretraindata/%s_t%i_pretrain_%s' %
                    (create_time, THREAD, agent.displayname))
    np.savez(path, boards, moves)
    return path
コード例 #2
0
ファイル: main.py プロジェクト: Kevsnz/PyTorch-2048
def playThroughStdin(game: Game2048):
    # 0 - left, 1 - up, 2 - right, 3 - down
    directions = {'W': 1, 'S': 3, 'A': 0, 'D': 2}

    game.reset()
    moveCount = 0

    try:
        while True:
            print(game.boardAsString())
            inStr = input('Enter direction of swipe: ')
            dir = directions[inStr.upper()]
            score, ended, valid = game.swipe(dir)

            if not valid:
                print('Invalid move, try again...')
                continue

            moveCount += 1

            if ended:
                print(
                    f'Game Over. Score: {2**game.score} after {moveCount} turns.'
                )
                return
            else:
                print(f'Swipe #{moveCount} gave {score} points.')

    except KeyboardInterrupt:
        print('Game aborted.')
コード例 #3
0
ファイル: env.py プロジェクト: OuYanghaoyue/yueyue_2048_DQN
    def __init__(self, size_board, seed=None):
        self.__size_board = size_board
        self.__game = Game2048(size_board)

        self.__zeros = 2.7
        self.__smooth = 1.1
        self.__var = -1.1
        self.__weight = 0.3

        # Numbers of possible movements
        self.action_space = spaces.Discrete(4)

        # Numbers of observations
        self.observation_space = spaces.Box(0,
                                            2**16, (size_board * size_board, ),
                                            dtype=np.int)

        # Reward range
        self.reward_range = (0., np.inf)

        # Initialise seed
        self.np_random, seed = seeding.np_random(seed)

        # Legends
        self.__actions_legends = {0: "UP", 1: "DOWN", 2: "RIGHT", 3: "LEFT"}

        # Old max
        self.__old_max = 0

        # Debug
        self.__last_action = None
        self.__last_scores_move = None

        self.valid_movements = []
コード例 #4
0
    def playbatch(self, ngames=50):

        scores = np.empty(ngames)

        running_reward = 10
        for g in range(ngames):
            # Starter nyt spil
            game = Game2048()
            c = _Counter()
            oldscore = 0
            while True:
                c.count()
                # Foretager en tur
                action = self.select_action(game)
                change = game.play(action)
                while change == 0:
                    action = self.select_action(game, False)
                    change = game.play(action)
                self.policy.episode_rewards.append(game.score - oldscore)

                # Når spillet er færdigt
                if change == 2:
                    break

            running_reward = running_reward * self.gamma + c.c * .01
            oldscore = game.score
            scores[g] = game.score
            self.finish_episode()

        return np.mean(scores)
コード例 #5
0
ファイル: CLIplayTest.py プロジェクト: sorenmulli/alpha2048
def run():

    game = Game2048()

    while True:
        prettyBoard = (2**game.board)
        prettyBoard[game.board == 0] = 0

        print(prettyBoard)

        try:
            direction = int(
                input(
                    "Angiv retning, du vil bevæge spillerpladen i: \n \t(0 for venstre, 1 for op, 2 for højre, 3 for ned)\n"
                ))

            if game.play(direction) == 2:
                break

            print("\n\tScore: {0}\n".format(game.score))

        except:
            print("\tFEJL: Dit input stinker")

    print("Desværre, basse, du tabte. Din score blev {0}".format(game.score))

    pass
コード例 #6
0
ファイル: CLIplayTest.py プロジェクト: sorenmulli/alpha2048
def run():

    game = Game2048(deterministic=[True, True])

    while True:
        prettyBoard = (2**game.board)
        prettyBoard[game.board == 0] = 0

        print(prettyBoard)
        print(game.score)
        try:
            direction = int(
                input(
                    "Angiv retning, du vil bevæge spillerpladen i: \n \t(0 for venstre, 1 for op, 2 for højre, 3 for ned)\n"
                ))
            if direction == -1:
                raise IndentationError

            if game.play(direction) == 2:
                break

            print("\n\tScore: {0}\n".format(game.score))
        except IndentationError:
            import sys
            sys.exit()
        except:
            print("\tFEJL: Dit input stinker")

    print("Desværre, basse, du tabte. Din score blev {0}".format(game.score))

    pass
コード例 #7
0
def playBatch(W1, W2, learningRate, batchSize = 100, lastmean=0):
	score, maxTiles = np.empty(batchSize), np.empty(batchSize)
	newW1, newW2 = torch.clone(W1).detach(), torch.clone(W2).detach()
	
	newW1.requires_grad = False
	newW2.requires_grad = False
	scoremean = 0
	
	for i in range(batchSize):
		game = Game2048()
		directions = list()
		prob = list()
		while True:
			logp = policyNetwork(torch.Tensor(game.board).view(16), W1, W2)
			with torch.no_grad():
				direction = weightedChoice(logp)


			change = game.play(direction)
			while True:
				if change != 0:
					break
				else:
					with torch.no_grad():
						direction = weightedChoice(logp)
					change = game.play(direction)

			if change == 2:
				break
			
			prob.append(logp)
			directions.append(direction)


		maxValue = np.max(game.board)
		reward = int(rewardFunction(game.board, game.score, scoremean))

		for j in range(len(directions)):
			gradients = torch.Tensor([0, 0, 0, 0])
			gradients[directions[j]] = reward
			prob[j].backward(gradients)
			


		with torch.no_grad():
			newW1 += reward * learningRate * W1.grad
			newW2 += reward * learningRate * W2.grad
			# print(W1.mean(), W2.mean())
			# print(W1[W1>0].size(), W2[W2>0].size())
			# print(W1)
			W1.grad.zero_()
			W2.grad.zero_()

		score[i] = game.score
		maxTiles[i] = maxValue

		# scoremean = 

	return score, maxTiles, newW1, newW2
コード例 #8
0
ファイル: agent_player.py プロジェクト: Kevsnz/PyTorch-2048
    def _makeTurn(self, net: AgentNet, game: Game2048, eps: float):
        state = game.board.copy()

        if random.random() < eps:
            valid = False
            moves = [0, 1, 2, 3]
            while not valid:
                dir = moves[random.randrange(len(moves))]
                reward, ended, valid = game.swipe(dir)
                moves.remove(dir)
        else:
            action = net(net.prepareInputs(state)).squeeze(0)
            valid = False
            while not valid:
                dir = torch.argmax(action).item()
                reward, ended, valid = game.swipe(dir)
                action[dir] = torch.min(action) - 0.1

        return state, dir, reward, ended, game.board.copy()
コード例 #9
0
    def play_batch(self, batchsize, comparescore=0):

        n = 5000

        #lister til at gemme resultater
        scores = np.empty(batchsize)
        maxtiles = np.empty(batchsize)
        turns = np.empty(batchsize)

        for i in range(batchsize):

            #Påbegynd spil
            game = Game2048()

            #Pre-spiller spillet
            #game = self.preplay_game(game, alg.runInRing, 2400)

            # Score hver tur
            turnscores = torch.empty(n)

            #For-loop for en sikkerheds skyld
            for turn in range(n):
                #Henter spillebrættet som vektor
                gameState = torch.Tensor(game.board, device=device).view(
                    game.n**2).unsqueeze(0)

                #Finder valget baseret på feed-forward gennem policy-netværket og udfører det
                choice = self.make_choice(gameState)
                change = game.play(choice)

                turnscores[turn] = int(game.score)

                #Hvis spillet er tabt
                if change == 2:
                    break
            #Gemmer reward
            turnscores = turnscores[:turn + 1]
            # self.policy.rewards.append(turnscores)
            self.policy.rewards.append(self.discount_reward(turnscores))

            #Gemmes score, maxtiles og sidste tur
            scores[i] = game.score
            maxtiles[i] = 2**np.max(game.board)
            turns[i] = turn

        # Laver rewards om en til en 1-dimensionel tensor
        self.policy.rewards = torch.cat(self.policy.rewards)

        # import matplotlib.pyplot as plt
        # plt.plot(self.policy.rewards.numpy())
        # plt.show()
        return scores, maxtiles, turns
コード例 #10
0
 def maxValue(self, game, depth, max_depth):
     bestScore = -1
     bestMove = 0
     state = GameState(game.getGrid(), game.getScore(), game.isGameOver())
     for move in self.moves:
         newGame = Game2048(prev_state=state)
         if newGame.moveIsValid(move):
             newGame.move(move)
             value = self.expectedValue(newGame, depth, max_depth)
             if value > bestScore:
                 bestScore = value
                 bestMove = move
     return bestMove, bestScore
コード例 #11
0
ファイル: algorithms.py プロジェクト: sorenmulli/alpha2048
def playAGame(func):
	game = Game2048()
	count = 0

	while True:

		direction = func(game, count)
		count += 1

		if game.play(direction) == 2:
			maxValue = np.max(game.board)
			break

	return game.score, maxValue
コード例 #12
0
ファイル: main.py プロジェクト: sorenmulli/alpha2048
def run_evaluation(paths, agent, evals=10000, with_show=True):

    # Evaluates an agent

    threadstr = "T%i" % THREAD
    scores = np.empty(evals)
    maxtiles = np.empty(evals)

    agentstr = "Agent: %s\n" % agent.displayname
    file_out = open(paths["log_eval"], "w+", encoding="utf-8")
    file_out.write(agentstr)
    file_out.close()

    for i in range(evals):
        # Starts new game
        game = Game2048()
        change = 0

        while change != 2:
            choice = agent.make_a_move(game)
            change = game.play(choice)

        scores[i] = game.score
        maxtiles[i] = 2**np.max(game.board)
        print(threadstr, i, game.score, 2**np.max(game.board))

    # String with evaluation results
    resstr = "Gns. score: {0}, std. på score: {1}\nMaxtile: {2}, gns. maxtile: {3}\nFord. af maxtile: {4}".format(
        int(np.mean(scores)), int(np.std(scores)), int(np.max(maxtiles)),
        round(np.mean(maxtiles), 2), max_tile_distribution(maxtiles))
    print(threadstr, resstr)

    # Bootstrap statistics
    boot_mu, boot_std = bootstrap(scores)
    boot_str = "BOOTSTRAP: Gns. score: %i, std. på score: %i" % (boot_mu,
                                                                 boot_std)

    print(threadstr, boot_str)

    # Writes log file
    file_out = open(paths["log_eval"], "a", encoding="utf-8")
    file_out.write(resstr + "\n" + boot_str + "\n")
    file_out.write("Score\tMaxtile\n")
    for s, m in zip(scores, maxtiles):
        file_out.write("%i\t%i\n" % (s, m))
    file_out.close()

    # Creates plot
    evalplot(scores, maxtiles, agent, paths["plot_eval"], with_show=with_show)
コード例 #13
0
 def expectedValue(self, game, depth, max_depth):
     state = GameState(game.getGrid(), game.getScore(), game.isGameOver())
     emptyTiles = game.getEmptyTiles()
     n_empty = len(emptyTiles)
     score = 0
     for tile in emptyTiles:
         for tileValue, prob in zip([2, 4], [0.9, 0.1]):
             newGame = Game2048(prev_state=state)
             newGame.placeTile(tile, tileValue)
             if depth < max_depth:
                 move, value = self.maxValue(newGame, depth + 1, max_depth)
             else:
                 value = self.evaluate(newGame)
             score += (prob / n_empty) * value
     return score
コード例 #14
0
 def nextMoveRecur(self, game, depth, max_depth, base=0.9):
     bestScore = -1
     bestMove = 0
     state = GameState(game.getGrid(), game.getScore(), game.isGameOver())
     for move in self.moves:
         newGame = Game2048(prev_state=state)
         if (newGame.moveIsValid(move)):
             newGame.move(move)
             score = self.evaluate(newGame)
             if depth <= max_depth:
                 my_move, my_score = self.nextMoveRecur(
                     newGame, depth + 1, max_depth)
                 score += my_score * pow(base, depth + 1)
             if score > bestScore:
                 bestMove = move
                 bestScore = score
     return (bestMove, bestScore)
コード例 #15
0
ファイル: main.py プロジェクト: sizumita/2048-bot
async def on_message(message):
    if not message.content.startswith('!2048'):
        return
    ch = message.channel
    banmen = None
    if message.content == '!2048 edit':
        await ch.send('盤面を送信してください')
        msg = await client.wait_for(
            'message',
            check=lambda m: m.author.id == message.author.id and m.channel.id
            == message.channel.id,
            timeout=60)
        banmen = list(
            map(lambda x: list(map(int, x.split())), msg.content.split('\n')))
    game = Game2048(banmen)
    before = await send_image(ch, game)
    before2 = None
    while game.has_0():
        before2 = await ch.send('入力をどうぞ')
        try:
            msg = await client.wait_for(
                'message',
                check=lambda m: m.author.id == message.author.id and m.channel.
                id == message.channel.id and m.content in
                ['w', 'a', 's', 'd', 'q'],
                timeout=60)
        except Exception:
            return
        content = msg.content
        if content == 'q':
            await ch.send('終了')
            return
        if content == 'w':
            game.up()
        if content == 'a':
            game.left()
        if content == 's':
            game.down()
        if content == 'd':
            game.right()
        game.set_2()
        await before.delete()
        await before2.delete()
        before = await send_image(ch, game)
    await ch.send('負け。')
コード例 #16
0
    def getBoard(self):
        """
        Gets the tile value for each tile in the grid
        """
        self.window = ImageGrab.grab()

        newGrid = [0 for _ in range(16)]

        for index, coord in enumerate(TILE_COORDINATES):
            try:
                newGrid[index] = self.getTileValue(coord)
                state = GameState(newGrid, 0, self.gameOver)
                self.game = Game2048(prev_state=state)
            except KeyError:
                print(self.window.getpixel(coord))
                self.gameOver = True
                break
        return self.game, self.gameOver
コード例 #17
0
ファイル: asgertest.py プロジェクト: sorenmulli/alpha2048
    def playbatch(self, idx, ngames=100):

        newweights = [w.clone().detach() for w in self.weights]
        scores = []
        maxtiles = []

        for i in range(ngames):

            directions = []
            logps = []
            game = Game2048()
            # Spiller et spil
            for k in range(100):
                logp = self.feedforward(
                    torch.Tensor(game.board.reshape(game.n**2)), newweights)
                while True:
                    with torch.no_grad():
                        direction = self.sampler(logp)
                        change = game.play(direction)
                        if change in (1, 2):
                            break

                directions.append(direction)
                logps.append(logp)
                if change == 2:
                    break

            reward = self.reward(game.score)

            for k in range(len(directions)):
                y = torch.zeros(game.n)
                y[directions[k]] = reward
                logps[k].backward(y)

            with torch.no_grad():
                for i in range(self.nlayers - 1):
                    newweights[i] += self.eta * reward * self.weights[
                        i].grad / self.weights[i].grad.norm()
                    self.weights[i].zero_()

            scores.append(game.score)
            maxtiles.append(game.board.max())

        return newweights, np.mean(scores), np.mean(maxtiles)
コード例 #18
0
    def playBatch(self, batchSize=50):
        #lister til at gemme resultater
        scores = np.zeros(batchSize)
        maxtiles = np.zeros(batchSize)
        turns = np.zeros(batchSize)

        for i in range(batchSize):
            #Påbegynd spil
            game = Game2048()
            oldscore = 0

            #For-loop for en sikkerheds skyld
            for turn in range(5000):

                #Henter spillebrættet som vektor
                gameState = torch.Tensor(game.board).view(16).unsqueeze(0)

                #Finder valget baseret på feed-forward gennem policy-netværket
                choice = self.makeChoice(gameState)

                #Udfører valget
                change = game.play(choice)

                #Henter score og beregner reward som ændring i score
                score = game.score
                reward = score - oldscore

                #Gemmer rewarden
                self.policy.rewardList.append(reward)

                #Hvis spillet er tabt
                if change == 2: break
                oldscore = score

                #Gemmes score, maxtiles og sidste tur
                scores[i] = game.score
                maxtiles[i] = 2**np.max(game.board)
                turns[i] = turn

            #Når spillet er s**t udføres policy-update
            self.doPolicyUpdate()

        return scores, maxtiles, turns
コード例 #19
0
    def __init__(self, n=4, seed=None):
        pygame.init()

        # Info on dimensions
        self.n = n
        self.tile_side = 130
        self.margin = 10
        self.score_board_height = 40
        self.width = self.tile_side * self.n + self.margin
        self.height = self.width + self.score_board_height
        self.size = (self.width, self.height)

        # The game screen
        pygame.display.set_caption("2048")
        self.screen = pygame.display.set_mode(self.size)

        # Game core
        self.game = Game2048(self.n, seed)

        self.on_end_screen = False
コード例 #20
0
ファイル: learner.py プロジェクト: sorenmulli/alpha2048
	def play_batch(self, batchsize):

		n = 5000
		
		# Arrays to save results
		scores = np.empty(batchsize)
		maxtiles = np.empty(batchsize)
		turns = np.empty(batchsize)
		propturns = np.empty(batchsize)
		
		for i in range(batchsize):
			# Starts a game
			game = Game2048(deterministic=self.params["determinism"])
			rewarder = self.params["rewarder"]	

			for turn in range(n):
				# Decides an action based on a feed forward through the policy network and executes the action
				change = self.make_choice(game)
				# Reward is given and is saved in the reward class
				rewarder.reward(game, turn)
				# If the game is lost
				if change == 2:
					break
			
			rewards = rewarder.final_reward(game, turn)

			#Saves the reward
			self.rewards.append(torch.Tensor(rewards))
			rewarder.clear()

			# Saves the score, maxtile and the last round of the game
			scores[i] = int(game.score)
			maxtiles[i] = 2 ** int(np.max(game.board))
			turns[i] = game.moves
			propturns[i] = game.propermoves
		
		# Reshapes the rewards into a one dimensional tensor
		self.rewards = torch.cat(self.rewards)
		
		return scores, maxtiles, turns, propturns, rewards
コード例 #21
0
ファイル: main.py プロジェクト: jaspercb/curses-2048
    def gamedriver(stdscr):
        curses.noecho()

        highscore = 0
        def redraw(state):
            stdscr.clear()
            stdscr.addstr(0, 0, 'High score: {0}'.format(highscore))
            stdscr.addstr(1, 0, 'Current score: {0}'.format(g.score))
            for y in range(4):
                for x in range(4):
                    stdscr.addstr(2 + y, x*5, str(state[y][x]))
        keys = {
            curses.KEY_DOWN  : 'down',
            curses.KEY_UP    : 'up',
            curses.KEY_LEFT  : 'left',
            curses.KEY_RIGHT : 'right',
        }
        overrides = {
            ord('u') : lambda: g.undo(),
        }
        while True:
            g = Game2048()
            redraw(g.state)
            try:
                while True:
                    key = stdscr.getch()
                    if key in overrides:
                        overrides[key]()
                        redraw(g.state)
                    elif key in keys:
                        g.move(keys[key])
                        highscore = max(highscore, g.score)
                        redraw(g.state)
                        # rerender
            except Game2048.GameOver:
                stdscr.addstr(5, 0, 'Game over. Press ENTER to start a new game.')
                while stdscr.getch() not in [curses.KEY_ENTER, ord('\n'), ord('\r')]:
                    pass
コード例 #22
0
    def __init__(self, evaluation_mode: bool = False):
        super().__init__()
        self._evaluation_mode = evaluation_mode
        self._action_spec = array_spec.BoundedArraySpec(shape=(),
                                                        dtype=np.int32,
                                                        minimum=0,
                                                        maximum=3,
                                                        name='action')
        self._observation_spec = array_spec.BoundedArraySpec(
            shape=(4, 4, 1),
            dtype=np.float32,
            minimum=1,
            maximum=12,
            name='observation')
        self._state = np.zeros((4, 4, 1))
        self._episode_ended = False
        self._moves = []
        self.best_score = 0

        if self._evaluation_mode:
            self.station = Station(Game2048())
            self._moves.append(
                lambda: self.station.game_window().send_keys(Keys.UP))
            self._moves.append(
                lambda: self.station.game_window().send_keys(Keys.RIGHT))
            self._moves.append(
                lambda: self.station.game_window().send_keys(Keys.DOWN))
            self._moves.append(
                lambda: self.station.game_window().send_keys(Keys.LEFT))
        else:
            self.game = Game2048Mem(Board())
            self.game.start()
            self._moves.append(lambda: self.game.link_keys(0))
            self._moves.append(lambda: self.game.link_keys(1))
            self._moves.append(lambda: self.game.link_keys(2))
            self._moves.append(lambda: self.game.link_keys(3))
        self.score = 0
コード例 #23
0
import sys
import numpy as np

from qtest import QNN, Agent


from game import Game2048
from utilities import create_one_hot_repr
import reward_functions as rf 

if __name__ == '__main__':
	agent = Agent(gamma = 0.9, epsilon = 1.0, lr = 0.003, max_memory = 5000, replace=None)

	while agent.memory_stored < agent.max_memory:
		
		game = Game2048()
		rewarder = rf.ScoreChange()
		state = create_one_hot_repr(game.board)
		
		change = 1
		i = 0

		while change != 2:
			action = np.random.randint(4)
			change = game.play(action)
			state_new =  create_one_hot_repr(game.board)

			rewarder.reward(game, i)
			reward = rewarder.rewards[i]
			i += 1
コード例 #24
0
 def __init__(self):
     self.game = Game2048()
     self.gameOver = False
     self.initializeGame()
コード例 #25
0
 def create_one_game(self):
     """Generate a new game instance"""
     return Game2048(task_name=self.result_path, game_mode=False)
コード例 #26
0
'''
Main file of the game: Implementation of the Interface with pygame
'''

import pygame
from game import Game2048
from pygame_window import PyGameWindow

if __name__ == '__main__':
    pygame.init()
    game2048 = Game2048()
    pygameWindow = PyGameWindow(game2048)
    pygameWindow.launch()
    pygame.quit()

    
    
コード例 #27
0
ファイル: interface.py プロジェクト: sorenmulli/alpha2048
	def __init__(self, agent = None, timestep = .5,  **kwargs):
		super().__init__(**kwargs)
		self.agent = agent
		self.timestep = timestep
		
		self.game = Game2048()
コード例 #28
0
ファイル: ai.py プロジェクト: arturwaquil/2048-AI
def train(model, episodes=100, ckpt=None, manager=None):

    big_tic = time.time()

    game = Game2048(seed=1)
    memory = Memory()

    # Track progress
    scores = []
    highest_tiles = []
    steps_list = []

    # If ckpt and manager were passed, set flag to save training checkpoints
    save_ckpts = ckpt is not None and manager is not None

    # Aux function to print training log
    def print_data(data):
        [print((str(item) + '\t').expandtabs(15), end='') for item in data]
        print("")
        with open('training_log.csv', 'a', newline='') as f:
            csv.writer(f, delimiter='\t').writerow(data)

    for episode in range(episodes):

        if episode % 100 == 0:
            print_data([
                "Episode", "Time", "Reward", "Score", "Highest", "L", "U", "R",
                "D", "Steps"
            ])

        # Reinitialize game and progress-tracking variables
        tic = time.time()
        game.new_game()
        _, observation = game.current_state()
        observation = preprocess_obs(observation)

        memory.clear()

        action_history = [0, 0, 0, 0]
        old_score = 0
        steps = 0

        while True:

            # Select feasible action based on the model, and perform it in the game
            action = choose_action(model, observation,
                                   np.array(game.possible_moves()))

            next_observation, score, done, tiles_merged = game.step(action)

            # TODO: Rethink how the reward is obtained. Maybe getting the score at each step
            # is not the best strategy. Other possibilities are: getting the final score of
            # the game; getting the final sum of tiles; getting the difference between the
            # sum of tiles now and in previous step; or a mixture of the mentioned strategies.
            # Maybe use metrics from the preprocessed observations instead of the raw ones.

            # # Need to experiment a bit more
            # reward1 = min((score - old_score)/1024, 1)
            # reward2 = min(tiles_merged/4, 1)
            # reward = 0.7*reward1 + 0.3*reward2
            reward = score - old_score

            next_observation = preprocess_obs(next_observation)

            old_score = score

            memory.add_to_memory(observation, action, reward)
            observation = next_observation

            action_history[action] += 1
            steps += 1

            # Train model at the end of each episode
            if done:
                # Calculate total reward of the episode and store it in the history
                total_reward = sum(memory.rewards)

                scores.append(score)

                highest_tile = int(2**np.max(observation))
                highest_tiles.append(highest_tile)

                steps_list.append(steps)

                time_since_start = time.time() - big_tic
                if time_since_start < 100:
                    elapsed_time = "{:.1f}s (+{:.1f}s)".format(
                        time_since_start,
                        time.time() - tic)
                else:
                    elapsed_time = "{}s (+{:.1f}s)".format(
                        int(np.round(time_since_start)),
                        time.time() - tic)

                print_data([
                    episode, elapsed_time, total_reward, score, highest_tile,
                    *action_history, steps
                ])

                # Train the model using the stored memory
                train_step(model,
                           optimizer,
                           observations=np.vstack(memory.observations),
                           actions=np.array(memory.actions),
                           discounted_rewards=discount_rewards(memory.rewards))

                # Save training checkpoint for every tenth episode
                if save_ckpts and (episode + 1) % 1000 == 0:
                    save_path = manager.save()
                    # print("Saved checkpoint for episode {}: {}\n".format(episode, save_path))

                memory.clear()
                break

    big_elapsed = int(time.time() - big_tic)
    print("\nTotal training time: {}s\n".format(big_elapsed))

    return model, [scores, highest_tiles, steps_list]
コード例 #29
0
ファイル: interface.py プロジェクト: sorenmulli/alpha2048
	def restart_game(self, dt):
		self.game = Game2048()			
		self.main_loop()
コード例 #30
0
def TestGame():
    print('Running Game2048 tests...')
    passes = 0
    fails = 0

    cases = [
        {  # 0
            'board': [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]],
            'score': 0,
            'ended': False,
            'valid': False
        },
        {  # 1
            'board': [[0, 0, 0, 0], [0, 2, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]],
            'score': 0,
            'ended': False,
            'valid': True
        },
        {  # 2
            'board': [[0, 0, 0, 0], [0, 1, 0, 1], [0, 0, 0, 0], [0, 0, 0, 0]],
            'score': 4,
            'ended': False,
            'valid': True
        },
        {  # 3
            'board': [[0, 0, 0, 0], [0, 1, 1, 2], [0, 0, 0, 0], [0, 0, 0, 0]],
            'score': 4,
            'ended': False,
            'valid': True
        },
        {  # 4
            'board': [[0, 0, 0, 0], [1, 0, 1, 2], [0, 0, 0, 0], [0, 0, 0, 0]],
            'score': 4,
            'ended': False,
            'valid': True
        },
        {  # 5
            'board': [[0, 0, 0, 0], [1, 0, 1, 2], [2, 0, 2, 3], [0, 0, 0, 0]],
            'score': 12,
            'ended': False,
            'valid': True
        },
        {  # 6
            'board': [[0, 0, 0, 0], [0, 1, 1, 1], [0, 0, 0, 0], [0, 0, 0, 0]],
            'score': 4,
            'ended': False,
            'valid': True
        },
        {  # 7
            'board': [[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]],
            'score': 0,
            'ended': False,
            'valid': False
        },
        {  # 8
            'board': [[0, 0, 0, 0], [0, 10, 0, 10], [0, 0, 0, 0], [0, 0, 0,
                                                                   0]],
            'score': 2048,
            'ended': True,
            'valid': True
        },
        {  # 9
            'board': [[4, 4, 3, 4], [4, 3, 4, 5], [3, 4, 5, 6], [4, 5, 6, 7]],
            'score': -11,
            'ended': True,
            'valid': True
        },
        {  # 10
            'board': [[0, 0, 0, 0], [1, 1, 1, 1], [0, 0, 0, 0], [0, 0, 0, 0]],
            'score': 8,
            'ended': False,
            'valid': True
        },
    ]

    game = Game2048()

    for i in range(len(cases)):
        case = cases[i]
        game.reset()
        game.board = np.array(case['board'])
        score, ended, valid = game.swipe(2)
        if score != case['score'] or ended != case['ended'] or valid != case[
                'valid']:
            print(
                f'FAIL: Test {i}, got: {score} ({ended}, {valid}), expected {case["score"]} ({case["ended"]}, {case["valid"]})'
            )
            print(game.boardAsString())
            fails += 1
        else:
            passes += 1