Esempio n. 1
0
def update_frame(x):
    global state, score, high_score, last_move, bot_mode, down_press

    if bot_mode:
        # a = policy(Variable(torch.from_numpy(state).type(torch.FloatTensor)))
        # _, ac = a.max(0)
        # action = ac.item()

        a = policy(Variable(torch.from_numpy(state).type(torch.FloatTensor)))
        # a = F.softmax(a, dim=-1)
        c = Categorical(a)
        action = c.sample()

        # action = train.select_action(state).item()

        state, reward, done = game.step(action)

        last_move = action
    else:
        state, reward, done = game.step(0)
        if down_press:
            game.active_piece, game.grid, _ = game.move_down(game.active_piece, game.grid)
            last_move = 4

    score += reward

    if done:
        game.reset()
        high_score = max(high_score, score)
        score = 0
Esempio n. 2
0
async def stopBJ(ctx):
	"""Stop an active game of blackjack & reset everything."""
	global game
	
	game.reset()
	game.set_game_state("STOPPED")
	await ctx.send("Awwww....how sad.  If you'd like to start a game, use __!startBJ__")
Esempio n. 3
0
def init():
    global state, speed, last_update

    # set the initial time for the first update with the current time
    last_update = time.time()

    game.score = 0
    game.reset()
    graphics.drawGame()
    state = 1
    try:
        speed = config.game_speed[parser.args.speed]
    except:
        speed = config.game_speed['m']

    livesIn = parser.args.lives
    if livesIn >= 1 and livesIn <= 5:
        game.lives = livesIn
        game.livesMax = livesIn
    elif livesIn > 5:
        game.lives = 5
        game.livesMax = 5
    else:
        game.lives = 1
        game.livesMax = 1
Esempio n. 4
0
def setup():
    if game.setuplock == 0:
        game.setuplock=1
        game.movelock=1
        game.placelock=1
        game.reset()
        game.pu()
        game.clear()
        game.pensize(5)
        game.edge=80
        game.sety(0)
        game.setx(0)
        game.title("PyTacToe")
        game.speed(0)
        game.plansza()
        game.speed(2)
        game.sety(0)
        game.setx(0)
        game.x=1
        game.y=1
        game.last=2
        print("x - 1")
        print("o - 2")
        game.f00=game.f01=game.f02=game.f10=game.f11=game.f12=game.f20=game.f21=game.f22="dummy"
        game.setuplock=0
        game.movelock=0
        game.placelock=0
Esempio n. 5
0
def main():
    viewPort = viewport.ViewPort(WINWIDTH, WINHEIGHT, topLeft=Point(400, 80))
    game = CandySeller(viewPort)

    while True:
        game.run()
        # Re-initialised the game state.
        game.reset()
Esempio n. 6
0
def main():
    viewPort = viewport.ViewPort(WINWIDTH, WINHEIGHT)
    game = SheriffQuest(viewPort)

    while True:
        game.run()
        # Re-initialised the game state.
        game.reset()
Esempio n. 7
0
def main():
    print("Starting Jimmy Pixel...")
    viewPort = viewport.ViewPort(WINWIDTH, WINHEIGHT, topLeft=Point(400, 80))
    game = JimmyPixel(viewPort)
    print("Created Jimmy Pixel game...")

    while True:
        game.run()
        # Re-initialised the game state.
        game.reset()
Esempio n. 8
0
def run():
    try:
        parser.init()
        stage.init()
        graphics.init()
        theme.init()
        game.reset()
        gameloop.start()

    except KeyboardInterrupt:
        exit()
def run():
    try:
        parser.init()
        stage.init()
        graphics.init()
        theme.init()
        game.reset()
        gameloop.start()

    except KeyboardInterrupt:
        exit()
Esempio n. 10
0
def on_key_press(symbol, modifiers):
    if symbol == key.LEFT:
        game.batE.status = "PRESS"

    elif symbol == key.RIGHT:
        game.batD.status = "PRESS"

    elif symbol == key.SPACE:
        game.molaS = 'PRESS'

    if game.status == "GAME OVER":
        if symbol == key.ENTER:
            game.reset()
Esempio n. 11
0
    def init_random_exp_memory(self, size):
        if size > self.memory_size:
            size = self.memory_size

        game = self.get_game()
        self.exp_memory.add(game.get_state(), 0, 0, 0)
        for i in range(size):
            random_action = np.random.randint(0, self.num_actions)
            reward, is_terminal = game.execute_action(random_action)
            state = game.get_state()
            self.exp_memory.add(state, random_action, reward, is_terminal)
            if is_terminal:
                game.reset()
                self.exp_memory.add(game.get_state(), 0, 0, 0)
Esempio n. 12
0
async def handleGame(data):
    if (data['text'] == 'join' and game.game_on == False):
        playerNum = game.add_player(data['sender'])
        data['text'] = f'join{str(playerNum)}'
        await notify_public_message(data)
    elif (data['text'] == 'join' and game.game_on == True):
        data = game.add_player(data['sender'])
        await notify_client(data)
    elif (data['text'] == 'ready'):
        data['text'] = 'start'
        data['sender'] = 'game'
        await notify_public_message(data)
        cards = game.deal_3(1)
        await notify_client(cards)
        cards = game.deal_3(2)
        await notify_client(cards)
    elif (data['text'] == 'move'):
        is_bomb = False
        if (len(data['data'])>3):
            is_bomb = True
        data = game.process_message(data)
        await notify_public_message(data)
        if (not is_bomb):
            player = game.players.index(game.whos_turn)+1
            cards = game.deal_1(player)
            await notify_client(cards)
        win_state = game.check_win_state()
        if (win_state[0]):
            print('win')
            data = game.create_win_message(win_state[1])
            await notify_public_message(data)
            data = game.reset()
            await notify_public_message(data)
        else:
            game.switch_turn()
Esempio n. 13
0
def on_key_press(symbol, modifiers):
    if symbol == key.LEFT:
        game.batE.status = "PRESS"

    elif symbol == key.RIGHT:
        game.batD.status = "PRESS"

    elif symbol == key.SPACE:
        game.game_start_time = game.time

    if game.status == "GAME OVER":
        if symbol == key.ENTER:
            game.reset()

    if game.status == "REINICIAR":
        if symbol == key.ENTER:
            game.reset()
Esempio n. 14
0
    def init_exp_memory(self, size):
        if size > self.memory_size:
            size = self.memory_size

        game = self.get_game()
        self.exp_memory.add(game.get_state(), 0, 0, 0)
        for i in range(size):
            action = 0
            if np.random.rand() < self.epsilon:
                action = np.random.randint(0, self.num_actions)
            else:
                action = self.qlearner.compute_action(game.get_state())[0]
            reward, is_terminal = game.execute_action(action)
            state = game.get_state()
            self.exp_memory.add(state, action, reward, is_terminal)
            if is_terminal:
                game.reset()
                self.exp_memory.add(game.get_state(), 0, 0, 0)
Esempio n. 15
0
def run():
    try:
        # Init the game
        parser.init()
        # Check for editor
        if (parser.args.editor):
            os.system("/usr/share/make-snake/snake-editor/__main__.py")
            sys.exit(0)
        graphics.init()
        theme.init()
        stage.init()
        game.reset()

        # Start the game
        gameloop.start()

    except KeyboardInterrupt:
        exit()
Esempio n. 16
0
def run():
    try:
        # Init the game
        parser.init()
        # Check for editor
        if (parser.args.editor):
            os.system("/usr/share/make-snake/snake-editor/__main__.py")
            sys.exit(0)
        graphics.init()
        theme.init()
        stage.init()
        game.reset()

        # Start the game
        gameloop.start()

    except KeyboardInterrupt:
        exit()
Esempio n. 17
0
async def unregister(cid):
    clients.pop(cid, None)
    await notify_clients()
    if cid in game.players:
        data = game.reset()
        await notify_public_message(data)
        data['tag'] = 'public'
        data['text'] = 'a player quit, game reset'
        await notify_public_message(data)
Esempio n. 18
0
    def eval(self, num_steps):
        game = self.get_game()
        total_score = 0.0
        current_score = 0.0
        num_games = 1.0
        max_score = 0.0
        for i in range(num_steps):
            action = self.qlearner.compute_action(game.get_state())[0]
            reward, is_terminal = game.execute_action(action)
            reward = self.renormalize_reward(reward)
            current_score += reward
            total_score += reward
            if is_terminal:
                game.reset()
                if i < (num_steps - 1):
                    num_games += 1
                    if current_score > max_score:
                        max_score = current_score
                    current_score = 0

        average = total_score / num_games

        return total_score, num_games, average, max_score
Esempio n. 19
0
    def find_max_games(self, num_steps, path, score_threshold):
        image_id = 0
        game = self.get_game()
        frames = []
        frames.append((np.copy(game.get_state()), 0.0))
        max_game_score = 0
        current_game_score = 0.0
        for i in range(num_steps):
            if i % (num_steps // 10) == 0:
                print("At step {}".format(i))
            action = self.qlearner.compute_action(game.get_state())[0]
            reward, is_terminal = game.execute_action(action)
            reward = self.renormalize_reward(reward)
            current_game_score += reward
            frames.append((np.copy(game.get_state()), current_game_score))
            if is_terminal:
                game.reset()
                if current_game_score > max_game_score:
                    max_game_score = current_game_score

                if current_game_score > score_threshold:
                    print("Saving images...")
                    for frame in frames:
                        self.save_image(frame[0],
                                        path,
                                        image_id,
                                        0,
                                        0,
                                        0,
                                        score=frame[1])
                        image_id += 1

                frames = []
                frames.append((np.copy(game.get_state()), 0.0))
                current_game_score = 0.0

        print("Max score: {}".format(max_game_score))
Esempio n. 20
0
    def eval_with_images(self, num_steps, path):
        image_id = 0
        game = self.get_game()
        self.save_image(game.get_state(), path, image_id, 0, 0, 0, 0.0)
        total_score = 0
        games_finished = 0
        max_game_score = 0
        current_game_score = 0.0
        for i in range(num_steps):
            image_id += 1
            action = self.qlearner.compute_action(game.get_state())[0]
            reward, is_terminal = game.execute_action(action)
            reward = self.renormalize_reward(reward)
            total_score += reward
            current_game_score += reward
            self.save_image(game.get_state(),
                            path,
                            image_id,
                            action,
                            reward,
                            is_terminal,
                            score=current_game_score)
            if is_terminal:
                game.reset()
                games_finished += 1
                if current_game_score > max_game_score:
                    max_game_score = current_game_score
                current_game_score = 0.0
                self.save_image(game.get_state(),
                                path,
                                image_id,
                                action,
                                reward,
                                is_terminal,
                                score=current_game_score)

        print("Max score: {}".format(max_game_score))
Esempio n. 21
0
def train(num_episodes, save_rate=0, starting_episode=0):
    global f
    import time

    if starting_episode > 0:
        model = 'models/tetris_policy_' + str(starting_episode) + '.pth'
        policy.load_state_dict(torch.load(model))

    start_time = time.time()
    total_time = 0

    running_reward = 1
    episode = starting_episode
    while episode != num_episodes:
        state = game.reset()  # Reset environment and record the starting state
        f = True

        game_reward = 0

        for _ in range(max_time):
            action = select_action(state)
            f = False
            # Step through environment using chosen action
            state, reward, done = game.step(action.item())

            # Save reward
            policy.reward_episode.append(reward)
            game_reward += reward
            if done:
                break

        # Used to determine when the environment is solved.
        running_reward = (running_reward * 0.99) + (game_reward * 0.01)

        update_policy()

        if episode % 50 == 0:
            cur_time = time.time()
            total_time += cur_time - start_time
            start_time = cur_time
            print(
                'Episode {}\tLast reward: {:5d}\tAverage reward: {:.2f}\tTime: {:.2f}'
                .format(episode, game_reward, running_reward, total_time))

        if save_rate != 0 and (episode + 1) % save_rate == 0:
            PATH = 'models/tetris_policy_' + str(episode + 1) + '.pth'
            torch.save(policy.state_dict(), PATH)

        episode += 1
Esempio n. 22
0
File: main.py Progetto: pqhuy98/DQN
 while True:
     step += 1
     x = random()
     if (x < explore_rate):
         action = rdi(0, game.actions)
     else:
         current_frames = np.array([
             np.concatenate(
                 (exp.get_last(),
                  [gp.transform(game.current_state, height, width)]))
         ])
         action = net.choose_action(current_frames, i %
                                    play_interval == play_interval - 1)[0]
     for j in range(1):  #+(i%play_interval!=play_interval-1)) :
         exp.update()
         state = game.next_frame(action, False)
     exp_batch = exp.get(xp_nb_batch)
     if (i % play_interval == play_interval - 1):
         print "--"
         gp.show_rgb(state[2])
     else:
         if (exp_batch[0].shape[0] > 0):
             net.learn(exp_batch[0], exp_batch[1], (state[0] == 0))
     if (state[0] == 0):
         game.reset()
         exp.reset()
         break
 print "step =", step
 if (i % save_interval == save_interval - 1):
     net.save("version01")
     print "Model saved"
Esempio n. 23
0
def reset():
    game.reset()
    graphics.drawGame()
Esempio n. 24
0
def task_three():
    print("Task Three")
    game.file_exists()
    game.start()
    game.reset()
Esempio n. 25
0
    start_move_time = time.time()
    while game.winner is None:
        if move_nr % 2 == 1:
            move = alpha_beta.rootAlphaBeta(game, 3, player1)
            color = player1.color
        else:
            move = alpha_beta.rootAlphaBeta(game, 2, player2)
            color = player2.color
        valid_move = game.doMove(color, move)
        if valid_move:
            move_nr += 1
        # player = game.getOtherPlayer(player)
        end_move_time = time.time()
        print(game.getPlayerString(color), move, " - Time used:", end_move_time - start_move_time)
        start_move_time = time.time()
        # print(game.getCorrectBoardArray())

    end_time = time.time() - start_time
    print(game)
    print("Moves:", len(game.moves), end_time)
    game_nr += 1
    if game.winner == game.WHITE:
        new_ai_win += 1
    else:
        new_ai_loose += 1
    game.reset()


print(new_ai_win, new_ai_loose)
print(time.time()-super_start_time)
Esempio n. 26
0
async def startBJ(ctx):
	"""Start a game of blackjack (Note: will reset an ongoing game!)"""
	global game
	
	game.reset(4)
	await ctx.send("Starting Blackjack!  Who'd like to play?\nRespond with the __!playing__ command to join.\n__!out__ will get you out of the game.")
            print "Playing randomly with prob", epsilon_greedy
        ep_index = ep_index + 1

        if ep_index > 1000:
            if avg_nonzero_reward > -0.1:
                print "Min reward over last 500 is", avg_nonzero_reward, "> -0.1, so finished training"
                return tf_sess, tf_output_layer


tf_sess, tf_output_layer = qlearning()

# Now observe the game with the learned parameters
game = Game()
game.set_render_or_not(True)
last_nonzero_rewards = []
current_state = game.reset()
print "Obs", current_state
for i in range(1000):
    action = compute_action(tf_sess, tf_output_layer, 1.0, current_state, 0.0)
    #action = np.random.randint(NUM_ACTIONS)
    obs, reward, terminal = game.step_environment(action)
    current_state = obs
    last_nonzero_rewards.append(reward)
    last_nonzero_rewards = last_nonzero_rewards[-500:]
    print obs, reward, terminal
    print "Average rewards:", np.mean(last_nonzero_rewards)

    if terminal:
        #print "Resetting"
        current_state = game.reset()
        #print "Obs", current_state
Esempio n. 28
0

if __name__ == "__main__":
    state_size = cf.stateSize()
    print(state_size)
    action_size = cf.actionSize()
    print(action_size)
    playerOne = DQNAgent(state_size, action_size)
    playerTwo = DQNAgent(state_size, action_size)
    playerOne.load("./save/cfOne2000.h5")
    playerTwo.load("./save/cfTwo2000.h5")
    done = 0
    batch_size = 42

    for e in range(EPISODES):
        cf.reset()
        state = cf.board
        state = np.reshape(state, [1, state_size])
        time = 0  # time is used just to count frames as a measurement of how long the ai lasted
        while True:
            sleep(5)
            time += 1
            actionOne = playerOne.act(state)
            moveOne = cf.dropTile(actionOne, 1)
            cf.render()
            print("/////////////////")
            currentBoard = np.reshape(cf.board, [1, state_size])
            actionTwo = playerTwo.act(currentBoard)
            moveTwo = cf.dropTile(actionTwo, -1)
            cf.render()
            print("/////////////////")
Esempio n. 29
0
 def post(self):
     game.reset()
     return jsonify(reset=True)
def run_main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--train_dir', type=str, help='Training directory')
    parser.add_argument(
        '--tmp_dir',
        type=str,
        default='/tmp',
        help=
        'Temporary directory to store model checkpoint for restoration process'
    )
    parser.add_argument('--game',
                        type=str,
                        default='Breakout-v0',
                        help='Game name')
    parser.add_argument('--dump_model',
                        action='store_true',
                        help='Dump model into checkpoint/graph and exit')
    parser.add_argument(
        '--remote_addr',
        default='localhost:5001',
        type=str,
        help='Remote service address to connect to for inference')
    parser.add_argument('--logfile', type=str, help='Logfile')
    parser.add_argument('--player_id',
                        default=0,
                        type=int,
                        help='Player ID used to index history entries')
    parser.add_argument('--num_episodes',
                        default=10000,
                        type=int,
                        help='Number of episodes to run')

    FLAGS = parser.parse_args()

    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    os.environ['NVIDIA_VISIBLE_DEVICES'] = ''
    os.environ['CUDA_VISIBLE_DEVICES'] = ''
    tf.logging.set_verbosity(tf.logging.ERROR)

    logging.basicConfig(filename=FLAGS.logfile,
                        filemode='a',
                        level=logging.INFO,
                        format='%(asctime)s.%(msecs)03d: %(message)s',
                        datefmt='%d/%m/%y %H:%M:%S')

    config = {
        'game': FLAGS.game,
        'tmp_dir': FLAGS.tmp_dir,
        'state_stack_size': 1,
        'remote_addr': FLAGS.remote_addr,
        'train_dir': FLAGS.train_dir,
        'input_map_shape': [84, 84, 1],
        'input_params_shape': [4],
        'owner_id': FLAGS.player_id,
        'env_id': 0,
    }

    if FLAGS.dump_model:
        env = gym.make(config['game'])
        config['num_actions'] = env.action_space.n

        import model

        m = model.create_model(config)
        m.save_checkpoint()
        exit(0)

    game = GameWrapper(config)

    episode = 0
    episode_rewards = []

    while FLAGS.num_episodes < 0 or episode < FLAGS.num_episodes:

        game.prev_st = game.reset()
        game.prev_model_st = halite_model_pb2.State(
            state=game.prev_st.state.tobytes(),
            params=game.prev_st.params.tobytes())
        done = False
        rewards = []

        while not done:
            done = game.loop_body()
            rewards.append(game.prev_reward)

        er = np.sum(rewards)
        episode_rewards.append(er)
        if len(episode_rewards) > 100:
            episode_rewards = episode_rewards[1:]

        logging.info(
            '{}: last train_step: {}, steps: {}, episode reward: {}, mean episode reward: {:.1f}, std: {:.1f}'
            .format(episode, game.train_step, len(rewards), er,
                    np.mean(episode_rewards), np.std(episode_rewards)))
        episode += 1
def qlearning():
    tf.reset_default_graph()
    tf_sess = tf.Session()

    tf_output_layer, l2_reg = create_network((1 + NUM_PLAYERS) * 2, 20,
                                             NUM_ACTIONS)

    tf_action = tf.placeholder("float", [None, NUM_ACTIONS], name='action')

    tf_target = tf.placeholder("float", [None], name='target')

    tf_q_for_action = tf.reduce_sum(tf.mul(tf_output_layer, tf_action),
                                    reduction_indices=1)

    with tf.name_scope('cost'):
        #reg_losses = [tf.nn.l2_loss(tf.get_variable('layer1/weights'))]
        tf_cost = tf.reduce_mean(tf.square(tf_target - tf_q_for_action)) + \
        l2_reg * L2_REG
        #+ L2_REG * sum(reg_losses)
        tf.scalar_summary('cost', tf_cost)
        tf.scalar_summary('l2_reg', l2_reg)
        #tf.scalar_summary('reg_loss', sum(reg_losses))

    with tf.name_scope('avg_reward'):
        tf_rewards = tf.placeholder("float", [None], name='rewards')
        tf_avg_reward = tf.reduce_mean(tf_rewards)
        tf.scalar_summary('avg_reward', tf_avg_reward)

    with tf.name_scope('train_op'):
        tf_train_operation = \
            tf.train.AdamOptimizer(INITIAL_LEARNING_RATE).minimize(tf_cost)

    merged = tf.merge_all_summaries()
    # Give this run of the program an identifier
    identifier = str(time.gmtime()[0:5])
    identifier = identifier.replace('(', '').replace(')', '')
    identifier = identifier.replace(' ', '-').replace(',', '')
    train_writer = tf.train.SummaryWriter('train-' + identifier, tf_sess.graph)

    tf_sess.run(tf.initialize_all_variables())

    epsilon_greedy = INITIAL_EPSILON_GREEDY

    transitions = deque()

    episode_lengths = []

    ep_index = 0
    loss = None

    game = Game()
    game.set_render_or_not(False)

    current_state = game.reset()
    keep_prob = 0.5

    last_nonzero_rewards = []

    t_step = 0

    # Record transitions
    while True:
        # Run an episode
        action = compute_action(tf_sess, tf_output_layer, keep_prob,
                                current_state, epsilon_greedy)
        obs, reward, terminal = game.step_environment(action)
        #print "Observation", obs

        next_state = obs
        last_nonzero_rewards.append(reward)
        last_nonzero_rewards = last_nonzero_rewards[-500:]

        transitions.append({
            'state': current_state,
            'next_state': next_state,
            'action': action,
            'reward': reward,
            'terminal': terminal
        })

        if terminal:
            current_state = game.reset()
        else:
            current_state = next_state

        if len(transitions) > MINI_BATCH_SIZE:
            summary = train(tf_sess, tf_train_operation, tf_output_layer,
                            merged, transitions, last_nonzero_rewards[-500:],
                            KEEP_PROB)
            if t_step % 100 == 0:
                train_writer.add_summary(summary, t_step)
            t_step = t_step + 1
        epsilon_greedy = epsilon_greedy - \
        (INITIAL_EPSILON_GREEDY-FINAL_EPSILON_GREEDY) / float(EPSILON_STEPS)
        epsilon_greedy = max(FINAL_EPSILON_GREEDY, epsilon_greedy)

        avg_nonzero_reward = np.mean(last_nonzero_rewards)
        if (ep_index % 100) == 0:
            print "Average nonzero reward", avg_nonzero_reward
            print "Playing randomly with prob", epsilon_greedy
        ep_index = ep_index + 1

        if ep_index > 1000:
            if avg_nonzero_reward > -0.1:
                print "Min reward over last 500 is", avg_nonzero_reward, "> -0.1, so finished training"
                return tf_sess, tf_output_layer
Esempio n. 32
0
    def train(self):
        if self.model_loaded:
            self.init_exp_memory(self.exp_memory_start_size)
        else:
            self.init_random_exp_memory(self.exp_memory_start_size)

        total_reward = 0.0
        games_played = 1

        game = self.get_game()
        self.exp_memory.add(game.get_state(), 0, 0, 0)

        while self.curr_step < self.max_steps:
            #play one game step according to epsilon-greedy policy
            action = 0
            if np.random.rand() < self.epsilon:
                action = np.random.randint(0, self.num_actions)
            else:
                action = self.qlearner.compute_action(game.get_state())[0]

            reward, is_terminal = game.execute_action(action)
            self.exp_memory.add(game.get_state(), action, reward, is_terminal)
            if is_terminal:
                game.reset()
                self.exp_memory.add(game.get_state(), 0, 0, 0)
                games_played += 1

            total_reward += self.renormalize_reward(reward)

            #compute next epsilon
            self.epsilon = np.maximum(self.epsilon_min,
                                      self.epsilon - self.epsilon_step)
            self.memory_beta = np.minimum(
                self.memory_beta_end, self.memory_beta + self.memory_beta_step)

            if self.curr_step % self.update_freq == 0:
                #sample a batch of transitions from experience memory
                s, a, r, s2, t, indices, p_values = self.exp_memory.sample(
                    self.batch_size)

                #output tensorboard summaries
                write_summary = False
                if (self.tensorboard_log_freq > 0) and (
                        self.curr_step % self.tensorboard_log_freq == 0):
                    write_summary = True

                #beta is divided by 2 here because squared error loss squares beta
                _, _, td = self.qlearner.train_step(
                    s,
                    a,
                    r,
                    s2,
                    t,
                    p_values,
                    self.memory_beta / 2.0,
                    write_summary=write_summary)
                self.exp_memory.update_p(indices, td)

            #update target network
            if self.target_network_update_mode == "soft":
                if self.curr_step % self.update_freq == 0:
                    self.qlearner.update_target_network()
            else:
                if self.curr_step % self.target_network_update_freq == 0:
                    self.qlearner.update_target_network()

            #output current training status
            if self.curr_step % self.output_freq == 0:
                average_reward = total_reward / games_played
                total_reward = 0
                games_played = 1
                print("step: {}  epsilon: {}  average reward per game: {}".
                      format(self.curr_step, self.epsilon, average_reward))

            #evaluate current target network and save model if average score per game has improved
            if (self.curr_step % self.eval_freq == 0):
                score, num_games, average, max_score = self.eval(
                    self.eval_steps)
                print("Evaluating model with {} steps:".format(
                    self.eval_steps))
                print(
                    "Total score: {}  Games: {}  Average: {}  Max: {}".format(
                        score, num_games, average, max_score))
                if average >= self.best_average_score:
                    print("Improved average score")
                    print("Saving model...")
                    self.save()
                    self.best_average_score = average
                #add average score to tensorboard
                summary = tf.Summary()
                summary.value.add(tag='average_score', simple_value=average)
                summary.value.add(tag='max_score', simple_value=max_score)
                self.qlearner.summary_writer.add_summary(
                    summary, self.curr_step)

            self.curr_step += 1
Esempio n. 33
0
# ========================================================================
#
#                           Initialization
#
# ========================================================================

# which model to run
# the full list of models are contained in the models folder
# training the network will generate models
model = 'models/tetris_policy_470000.pth'

# Loading the model and resetting the game state
policy = train.Policy()
policy.load_state_dict(torch.load(model))

state = game.reset()
score = 0
high_score = 0
last_move = 0

# Window constants
GAME_WIDTH = 100
UI_WIDTH = 150
GAME_HEIGHT = 180
SQUARE_WIDTH = 10
SQUARE_HEIGHT = 10

SCREEN_TITLE = "Tetris"
FONT_SIZE = 11
KEY_SIZE = 15