Ejemplo n.º 1
0
    def evaluate_moves_by_mlp(self, moves, my_fields, other_fields):
        max_move_index = 0
        max_move_value = 0
        for idx, move_die in enumerate(moves):
            die, move = move_die
            my_fields_after = copy.copy(my_fields)
            other_fields_after = copy.copy(other_fields)
            try:
                Game.apply_move(my_fields_after, other_fields_after, move)
                inputs = Board.prepare_any_inputs(my_fields_after,
                                                  other_fields_after)
                outputs = self.mlp.run_input(inputs)
                if outputs[0] > max_move_value:
                    max_move_value = max_move_value
                    max_move_index = idx
            except Exception as e:
                continue

        return moves[max_move_index]
Ejemplo n.º 2
0
def generate_game_code():
    valid = False
    while not valid:
        code = ""
        for i in range(3):
            code += (chr(random.randint(65, 90)))  # choose uppercase letter
        for i in range(3):
            code += (chr(random.randint(48, 57)))  # choose number
        valid = code not in GAMES
    GAMES[code] = Game()
    return code
Ejemplo n.º 3
0
    def train(self):
        tf.train.write_graph(self.sess.graph_def, self.model_path, 'td_gammon.pb', as_text=False)
        summary_writer = tf.summary.FileWriter('{0}{1}'.format(self.summary_path, int(time.time()), self.sess.graph_def))

        # the agent plays against itself, making the best move for each player
        #players = [TDAgent(Game.TOKENS[0], self,p=np.random.rand()/10), TDAgent(Game.TOKENS[1], self,p=np.random.rand()/10)]
        #players = [TDAgent(Game.TOKENS[0], self), TDAgent(Game.TOKENS[1], self)]
        validation_interval = 10000
        episodes = 200000
        t = trange(episodes, desc='Bar desc', leave=True)
        for episode in t:
            players = [TDAgent(Game.TOKENS[0], self,p=np.random.rand()/3), TDAgent(Game.TOKENS[1], self,p=np.random.rand()/3)]
            if episode != 0 and episode % validation_interval == 0:
                self.test(episodes=200)
                np.random.seed()
                #self.test(episodes=200,mode=1)
            t.refresh()
            game = Game.new()
            player_num = random.randint(0, 1)
            if player_num==0:
                game.reverse()
            x = game.extract_features(players[player_num].player)
            
            
            #print(self.xy.eval())
            
            game_step = 0
            while not game.is_over():
                game.next_step(players[player_num], player_num)
                player_num = (player_num + 1) % 2

                x_next = game.extract_features(players[player_num].player)
                V_next = self.get_output(x_next)
                self.sess.run(self.train_op, feed_dict={ self.x: x, self.V_next: V_next })

                x = x_next
                game_step += 1

            winner = game.winner()

            _, global_step, summaries, _ = self.sess.run([
                self.train_op,
                self.global_step,
                self.summaries_op,
                self.reset_op
            ], feed_dict={ self.x: x, self.V_next: np.array([[winner]], dtype='float') })
            summary_writer.add_summary(summaries, global_step=global_step)

            #tqdm.write("Game %d/%d (Winner: %s) in %d turns" % (episode, episodes, players[winner].player, game_step))
            self.saver.save(self.sess, self.checkpoint_path + 'checkpoint', global_step=global_step)
        tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
        summary_writer.close()
Ejemplo n.º 4
0
    def test(self, episodes=100, draw=False):
        players = [TDAgent(Game.TOKENS[0], self), RandomAgent(Game.TOKENS[1])]
        winners = [0, 0]
        for episode in range(episodes):
            game = Game.new()

            winner = game.play(players, draw=draw)
            winners[winner] += 1

            winners_total = sum(winners)
            print("[Episode %d] %s (%s) vs %s (%s) %d:%d of %d games (%.2f%%)" % (episode, \
                players[0].name, players[0].player, \
                players[1].name, players[1].player, \
                winners[0], winners[1], winners_total, \
                (winners[0] / winners_total) * 100.0))
Ejemplo n.º 5
0
    def train(self):
        with tf.device('/gpu:0') as dev:
            tf.train.write_graph(self.sess.graph_def, self.model_path, 'td_gammon.pb', as_text=False)
            summary_writer = tf.summary.FileWriter('{0}{1}'.format(self.summary_path, int(time.time()), self.sess.graph_def))

            # the agent plays against itself, making the best move for each player
            players = [TDAgent(Game.TOKENS[0], self), TDAgent(Game.TOKENS[1], self)]

            validation_interval = 1000
            episodes = 5000

            for episode in range(episodes):
                if episode != 0 and episode % validation_interval == 0:
                    self.test(episodes=100)

                game = Game.new()
                player_num = random.randint(0, 1)

                x = game.extract_features(players[player_num].player)

                game_step = 0
                while not game.is_over():
                    game.next_step(players[player_num], player_num)
                    player_num = (player_num + 1) % 2

                    x_next = game.extract_features(players[player_num].player)
                    V_next = self.get_output(x_next)
                    self.sess.run(self.train_op, feed_dict={ self.x: x, self.V_next: V_next })

                    x = x_next
                    game_step += 1

                winner = game.winner()

                _, global_step, summaries, _ = self.sess.run([
                    self.train_op,
                    self.global_step,
                    self.summaries_op,
                    self.reset_op
                ], feed_dict={ self.x: x, self.V_next: np.array([[winner]], dtype='float') })
                summary_writer.add_summary(summaries, global_step=global_step)

                print("Game %d/%d (Winner: %s) in %d turns" % (episode, episodes, players[winner].player, game_step))
                self.saver.save(self.sess, self.checkpoint_path + 'checkpoint', global_step=global_step)

            summary_writer.close()

            self.test(episodes=1000)
Ejemplo n.º 6
0
    def train(self):
        tf.train.write_graph(self.sess.graph_def, self.model_path, 'td_gammon.pb', as_text=False)
        summary_writer = tf.train.SummaryWriter('{0}{1}'.format(self.summary_path, int(time.time()), self.sess.graph_def))

        # the agent plays against itself, making the best move for each player
        players = [TDAgent(Game.TOKENS[0], self), TDAgent(Game.TOKENS[1], self)]

        validation_interval = 1000
        episodes = 5000

        for episode in range(episodes):
            if episode != 0 and episode % validation_interval == 0:
                self.test(episodes=100)

            game = Game.new()
            player_num = random.randint(0, 1)

            x = game.extract_features(players[player_num].player)

            game_step = 0
            while not game.is_over():
                game.next_step(players[player_num], player_num)
                player_num = (player_num + 1) % 2

                x_next = game.extract_features(players[player_num].player)
                V_next = self.get_output(x_next)
                self.sess.run(self.train_op, feed_dict={ self.x: x, self.V_next: V_next })

                x = x_next
                game_step += 1

            winner = game.winner()

            _, global_step, summaries, _ = self.sess.run([
                self.train_op,
                self.global_step,
                self.summaries_op,
                self.reset_op
            ], feed_dict={ self.x: x, self.V_next: np.array([[winner]], dtype='float') })
            summary_writer.add_summary(summaries, global_step=global_step)

            print("Game %d/%d (Winner: %s) in %d turns" % (episode, episodes, players[winner].player, game_step))
            self.saver.save(self.sess, self.checkpoint_path + 'checkpoint', global_step=global_step)

        summary_writer.close()

        self.test(episodes=1000)
Ejemplo n.º 7
0
def main():
    #random.seed(42)
    board = Board()
    game = Game(board)
    agent = RandomAgent(board)
    while not game.is_finished():
        dice = game.roll_dice()
        board.print()
        allowed_moves_made = False
        while allowed_moves_made is False:
            human_moves = present_dice_to_human_and_ask_move(dice)
            try:
                allowed_moves_made = game.apply(game.PLAYER1, human_moves)
            except Exception as e:
                print(e)
                allowed_moves_made = False
        dice = game.roll_dice()
        ai_moves = agent.move(dice)
        print("Dice for AI were {}. Resulting moves: {}".format(dice, ", ".join([str(move) for move in ai_moves])))
        game.apply(game.PLAYER2, ai_moves)
Ejemplo n.º 8
0
    def test(self, episodes=100, draw=False, save=None):
        players = [
            TDAgent(Game.TOKENS[0], self),
            TDAgent(Game.TOKENS[1], self)
        ]
        winners = [0, 0]
        for episode in range(episodes):
            game = Game.new()

            winner = game.play(players, draw=draw)
            if save:
                game.save_tmg(os.path.join(save, str(episode) + '.tmg'))
            winners[winner] += 1

            winners_total = sum(winners)
            print("[Episode %d] %s (%s) vs %s (%s) %d:%d of %d games (%.2f%%)" % (episode, \
                players[0].name, players[0].player, \
                players[1].name, players[1].player, \
                winners[0], winners[1], winners_total, \
                (winners[0] / winners_total) * 100.0))
Ejemplo n.º 9
0
    def test(self, episodes=100, draw=False, mode=0):
        if mode==0:
            players = [TDAgent(Game.TOKENS[0], self), Today_bot(Game.TOKENS[1])]
        if mode==1:
            players = [Today_bot(Game.TOKENS[0]), TDAgent(Game.TOKENS[1], self)]
        if mode==3:
            players = [TDAgent(Game.TOKENS[0], self), TDAgent(Game.TOKENS[1], self)]
        #players = [TDAgent(Game.TOKENS[0], self), TDAgent(Game.TOKENS[1], self)]
        winners = [0, 0]
        for episode in range(episodes):
            np.random.seed(episode)
            game = Game.new()

            winner = game.play(players, draw=draw)
            winners[winner] += 1

            winners_total = sum(winners)
        if mode<3:
            print("[Episode %d] %s (%s) vs %s (%s) %d:%d of %d games (%.2f%%)" % (episode, \
                players[0].name, players[0].player, \
                players[1].name, players[1].player, \
                winners[0], winners[1], winners_total, \
                (winners[mode] / winners_total) * 100.0))
            if (winners[mode] / winners_total) * 100.0 >self.max_wr:
                self.max_wr=(winners[mode] / winners_total) * 100.0
                w1=self.l1_W.eval()
                b1=self.l1_b.eval()
                w2=self.l2_W.eval()
                b2=self.l2_b.eval()
                np.savetxt("w1.txt", w1)
                np.savetxt("w2.txt", w2)
                np.savetxt("b1.txt", b1)
                np.savetxt("b2.txt", b2)
                with open("max_wr.txt", "w") as text_file:
                    text_file.write(str(self.max_wr))
        else:
            print("[Episode %d] %s (%s) vs %s (%s) %d:%d of %d games (%.2f%%)" % (episode, \
                players[0].name, players[0].player, \
                players[1].name, players[1].player, \
                winners[0], winners[1], winners_total, \
                (winners[0] / winners_total) * 100.0))
Ejemplo n.º 10
0
 def play(self):
     game = Game.new()
     game.play([TDAgent(Game.TOKENS[0], self), Today_bot(Game.TOKENS[1])], draw=True)
Ejemplo n.º 11
0
def run_game_loop(board, agent1, agent2, do_print=False):
    game = Game(board)
    while not game.is_finished():
        agent1.learn()
        #agent1.backprop()
        dice = game.roll_dice()
        if do_print:
            board.print()
            print("Dice for AI1 were {}.".format(dice))
        ai_moves = agent1.move(dice)
        if do_print:
            print("Resulting moves: {}.".format(", ".join(
                [str(move) for move in ai_moves])))
        game.apply(game.PLAYER1, ai_moves)

        if game.is_finished():
            break

        agent2.learn()
        #agent2.backprop()
        dice = game.roll_dice()
        if do_print:
            board.print()
            print("Dice for AI2 were {}.".format(dice))
        ai_moves = agent2.move(dice)
        if do_print:
            print("Resulting moves: {}.".format(", ".join(
                [str(move) for move in ai_moves])))
        game.apply(game.PLAYER2, ai_moves)

    winner = game.get_winner()
    if winner > 0:
        agent1.learn(np.array([1.0, 0.0]))
        agent2.learn(np.array([0.0, 1.0]))
        #agent1.backprop(np.array([1.0, 0.0]))
        #agent2.backprop(np.array([0.0, 1.0]))
    else:
        agent1.learn(np.array([0.0, 1.0]))
        agent2.learn(np.array([1.0, 0.0]))
        #agent1.backprop(np.array([0.0, 1.0]))
        #agent2.backprop(np.array([1.0, 0.0]))

    return winner, game.get_num_moves()
Ejemplo n.º 12
0
    def move(self, dice):
        self.last_my_fields_before_move = copy.copy(self.my_fields)
        self.last_other_fields_before_move = copy.copy(self.other_fields)
        if dice[0] == dice[1]:
            final_moves = []
            my_fields_after = copy.copy(self.my_fields)
            other_fields_after = copy.copy(self.other_fields)
            for _ in range(4):
                moves = self.generate_possible_moves(dice[0], my_fields_after,
                                                     other_fields_after)
                if len(moves) == 0:
                    return final_moves
                #print("Possible moves:\n{}".format(moves))
                used_die, best_move = self.evaluate_moves_by_mlp(
                    moves, my_fields_after, other_fields_after)
                #print("Best move: {}, {}".format(best_move, used_die))
                final_moves.append(best_move)
                try:
                    Game.apply_move(my_fields_after, other_fields_after,
                                    best_move)
                except Exception as e:
                    #print("dice {}, best_move {}".format(dice, best_move))
                    self.print_intermediate_board(my_fields_after,
                                                  other_fields_after)
                    raise e
                #self.print_intermediate_board(my_fields_after, other_fields_after)

            return final_moves
        else:
            final_moves = []
            moves = []
            moves.extend(
                self.generate_possible_moves(dice[0], self.my_fields,
                                             self.other_fields))
            moves.extend(
                self.generate_possible_moves(dice[1], self.my_fields,
                                             self.other_fields))
            #print("Possible moves:\n{}".format(moves))
            if len(moves) == 0:
                return final_moves
            used_die, best_move = self.evaluate_moves_by_mlp(
                moves, self.my_fields, self.other_fields)
            #print("Best move: {}, {}".format(best_move, used_die))
            final_moves.append(best_move)

            my_fields_after = copy.copy(self.my_fields)
            other_fields_after = copy.copy(self.other_fields)
            Game.apply_move(my_fields_after, other_fields_after, best_move)

            #self.print_intermediate_board(my_fields_after, other_fields_after)

            other_die = dice[0]
            if dice[0] == used_die:
                other_die = dice[1]
            moves = self.generate_possible_moves(other_die, my_fields_after,
                                                 other_fields_after)
            #print("Possible moves:\n{}".format(moves))
            if len(moves) > 0:
                used_die, best_move = self.evaluate_moves_by_mlp(
                    moves, my_fields_after, other_fields_after)
                #print("Best move: {}, {}".format(best_move, used_die))
                final_moves.append(best_move)
            return final_moves
Ejemplo n.º 13
0
    pos[25] = len(game.bar_pieces['x'])
    pos[26] = len(game.off_pieces['x'])
    pos[27] = -len(game.off_pieces['o'])
    for i, tup in enumerate(game.grid):
        if len(tup) == 0:
            pos[i + 1] = 0
        elif tup[0] == 'x':
            pos[i + 1] = len(tup)
        else:
            pos[i + 1] = -len(tup)
    return pos


if __name__ == '__main__':

    print pubeval(False, [0] + [-2, 0, 0, 0, 0, 5] + [0, 3, 0, 0, 0, -5] +
                  [5, 0, 0, 0, -3, 0] + [-5, 0, 0, 0, 0, 2] + [0] + [0, 0])

    from backgammon.game import Game

    g = Game.new()
    print pubeval(False, game_to_pos(g))

    actions = g.get_actions((5, 6), 'x', nodups=True)
    for a in sorted([str(foo) for foo in actions]):
        print a
    print
    actions = g.get_actions((5, 6), 'o', nodups=True)
    for a in sorted([str(foo) for foo in actions]):
        print a
Ejemplo n.º 14
0
from backgammon.game import Game, Board

# For testing purposes
g = Game()
g.initialize()
print(g)
g.play_(1, 0, 9)
print(g)
g.play_(-1, 7, 9)
print(g)
g.play_(-1, 7, 5)
print(g)
# g.play(1, 0, 11)
# print(g)
g.play_(-1, 5, 7)
print(g)
Ejemplo n.º 15
0
def run_game_loop(board, agent1, agent2, do_print=False):
    game = Game(board)
    while not game.is_finished():
        dice = game.roll_dice()
        if do_print:
            board.print()
            print("Dice for AI1 were {}.".format(dice))
        ai_moves = agent1.move(dice)
        if do_print:
            print("Resulting moves: {}.".format(", ".join(
                [str(move) for move in ai_moves])))
        game.apply(game.PLAYER1, ai_moves)

        if game.is_finished():
            break

        dice = game.roll_dice()
        if do_print:
            board.print()
            print("Dice for AI2 were {}.".format(dice))
        ai_moves = agent2.move(dice)
        if do_print:
            print("Resulting moves: {}.".format(", ".join(
                [str(move) for move in ai_moves])))
        game.apply(game.PLAYER2, ai_moves)

    return game.get_winner()
Ejemplo n.º 16
0
 def play(self):
     game = Game.new()
     game.play([TDAgent(Game.TOKENS[0], self), HumanAgent(Game.TOKENS[1])], draw=True)
Ejemplo n.º 17
0
    def train(self, episodes = 5000):
        tf.train.write_graph(self.sess.graph_def, self.model_path, 'td_gammon.pb', as_text=False)
        summary_writer = tf.train.SummaryWriter('{0}{1}'.format(self.summary_path, int(time.time()),
                                                                graph_def=self.sess.graph_def))

        # the agent plays against itself, making the best move for each player
        players = [TDAgent(Game.TOKENS[0], self), TDAgent(Game.TOKENS[1], self)]

        validation_interval = 1000
        report_freq = 10

        prev_time = time.time()
        prev_step = self.sess.run(self.global_step)
        plies_per_batch = 0

        for episode in range(episodes):
            if episode != 0 and episode % validation_interval == 0:
                self.test(episodes=100)

            game = Game.new()
            player_num = random.randint(0, 1)

            x = game.extract_features(players[player_num].player)

            game_step = 0
            while not game.is_over():
                game.next_step(players[player_num], player_num)
                player_num = (player_num + 1) % 2

                x_next = game.extract_features(players[player_num].player)
                V_next = self.get_output(x_next)
                self.sess.run(self.train_op, feed_dict={ self.x: x, self.V_next: V_next })

                x = x_next
                game_step += 1

            winner = game.winner()

            _, global_step, summaries, _ = self.sess.run([
                self.train_op,
                self.global_step,
                self.summaries_op,
                self.reset_op
            ], feed_dict={ self.x: x, self.V_next: np.array([[winner]], dtype='float') })

            print("Game %d/%d (Winner: %s) in %d turns" % (episode, episodes, players[winner].player, game_step))
            plies_per_batch += game_step
            if episode != 0 and episode % report_freq == 0:
                now = time.time()
                elapsed_time = now - prev_time
                steps_per_sec = (global_step - prev_step) / elapsed_time
                games_per_sec = report_freq / elapsed_time
                plies_per_game = plies_per_batch / report_freq
                print('e=%.2f sps=%.2f gps=%.2f ppg=%.1f global=%d prev=%d' % (elapsed_time, steps_per_sec, games_per_sec, plies_per_game, global_step, prev_step))
                
                summary_writer.add_summary(summaries, global_step=global_step)
                
                s1 = tf.Summary(value=[tf.Summary.Value(tag='rate/global_steps_sec',
                                                        simple_value=steps_per_sec)])
                summary_writer.add_summary(s1, global_step)
                
                s2 = tf.Summary(value=[tf.Summary.Value(tag='rate/games_sec',
                                                        simple_value=games_per_sec)])
                summary_writer.add_summary(s2, global_step)
                
                s3 = tf.Summary(value=[tf.Summary.Value(tag='rate/plies_per_game',
                                                        simple_value=plies_per_game)])
                summary_writer.add_summary(s3, global_step)
                
                self.saver.save(self.sess, self.checkpoint_path + 'checkpoint', global_step=global_step)
                prev_time = now
                prev_step = global_step
                plies_per_batch = 0

        summary_writer.close()

        self.test(episodes=1000)
Ejemplo n.º 18
0
 def random_selfplay(self):
     players = [RandomAgent(Game.TOKENS[0]), RandomAgent(Game.TOKENS[1])]
     game = Game.new()
     game.SLEEP = 0
     winner = game.play(players, draw=True)
Ejemplo n.º 19
0
 def play(self, ts=False):
     game = Game.new()
     game.play([TDAgent(Game.TOKENS[0], self),
                HumanAgent(Game.TOKENS[1])],
               draw=True,
               ts=ts)
Ejemplo n.º 20
0
        elif tup[0] == 'x':
            pos[i + 1] = len(tup)
        else:
            pos[i + 1] = -len(tup)
    return pos
    

if __name__ == '__main__':

    print pubeval(False, [0] +
                  [-2,  0,  0,  0,  0,  5] +
                  [ 0,  3,  0,  0,  0, -5] +
                  [ 5,  0,  0,  0, -3,  0] +
                  [-5,  0,  0,  0,  0,  2] +
                  [ 0] + 
                  [ 0,  0])

    from backgammon.game import Game

    g = Game.new()
    print pubeval(False, game_to_pos(g))

    actions = g.get_actions((5, 6), 'x', nodups=True)
    for a in sorted([str(foo) for foo in actions]):
        print a
    print
    actions = g.get_actions((5, 6), 'o', nodups=True)
    for a in sorted([str(foo) for foo in actions]):
        print a        
    
Ejemplo n.º 21
0
    def train(self):
        tf.train.write_graph(self.sess.graph_def,
                             self.model_path,
                             'td_gammon.pb',
                             as_text=False)
        summary_writer = tf.summary.FileWriter('{0}{1}'.format(
            self.summary_path, int(time.time()), self.sess.graph_def))

        # the agent plays against itself, making the best move for each player
        players = [
            TDAgent(Game.TOKENS[0], self),
            TDAgent(Game.TOKENS[1], self)
        ]

        #validation_interval = 1000
        #episodes = 5000
        validation_interval = 500
        episodes = 5000

        train_start_ts = time.time()
        for episode in range(episodes):
            start_ts = time.time()
            if episode != 0 and episode % validation_interval == 0:
                print('Episode:', episode)
                write('Episode: %d' % episode)
                self.test(episodes=100)
            game = Game.new()
            player_num = random.randint(0, 1)

            x = game.extract_features(players[player_num].player)

            game_step = 0
            while not game.is_over():
                game.next_step(players[player_num], player_num)
                player_num = (player_num + 1) % 2

                x_next = game.extract_features(players[player_num].player)
                V_next = self.get_output(x_next)
                self.sess.run(self.train_op,
                              feed_dict={
                                  self.x: x,
                                  self.V_next: V_next
                              })

                x = x_next
                game_step += 1

            winner = game.winner()

            _, global_step, summaries, _ = self.sess.run([
                self.train_op,
                self.global_step,
                self.summaries_op,
                self.reset_op,
            ],
                                                         feed_dict={
                                                             self.x:
                                                             x,
                                                             self.V_next:
                                                             np.array(
                                                                 [[winner]],
                                                                 dtype='float')
                                                         })
            summary_writer.add_summary(summaries, global_step=global_step)

            end_ts = time.time()
            print("%.2f - Game %d/%d (Winner: %s) in %d turns (%.2f secs)" %
                  (self.k, episode, episodes, players[winner].player,
                   game_step, end_ts - start_ts))
            """if episode in [9, 99, 999, 9999, 99999]:
                print("%d games avg time: %.2f secs" % (episode+1, (end_ts - train_start_ts) / (episode+1)))
            """
            self.saver.save(self.sess,
                            self.checkpoint_path + 'checkpoint',
                            global_step=global_step)

        summary_writer.close()
        write('Episode: 5000')
        self.test(episodes=100)