def evaluate_moves_by_mlp(self, moves, my_fields, other_fields): max_move_index = 0 max_move_value = 0 for idx, move_die in enumerate(moves): die, move = move_die my_fields_after = copy.copy(my_fields) other_fields_after = copy.copy(other_fields) try: Game.apply_move(my_fields_after, other_fields_after, move) inputs = Board.prepare_any_inputs(my_fields_after, other_fields_after) outputs = self.mlp.run_input(inputs) if outputs[0] > max_move_value: max_move_value = max_move_value max_move_index = idx except Exception as e: continue return moves[max_move_index]
def generate_game_code(): valid = False while not valid: code = "" for i in range(3): code += (chr(random.randint(65, 90))) # choose uppercase letter for i in range(3): code += (chr(random.randint(48, 57))) # choose number valid = code not in GAMES GAMES[code] = Game() return code
def train(self): tf.train.write_graph(self.sess.graph_def, self.model_path, 'td_gammon.pb', as_text=False) summary_writer = tf.summary.FileWriter('{0}{1}'.format(self.summary_path, int(time.time()), self.sess.graph_def)) # the agent plays against itself, making the best move for each player #players = [TDAgent(Game.TOKENS[0], self,p=np.random.rand()/10), TDAgent(Game.TOKENS[1], self,p=np.random.rand()/10)] #players = [TDAgent(Game.TOKENS[0], self), TDAgent(Game.TOKENS[1], self)] validation_interval = 10000 episodes = 200000 t = trange(episodes, desc='Bar desc', leave=True) for episode in t: players = [TDAgent(Game.TOKENS[0], self,p=np.random.rand()/3), TDAgent(Game.TOKENS[1], self,p=np.random.rand()/3)] if episode != 0 and episode % validation_interval == 0: self.test(episodes=200) np.random.seed() #self.test(episodes=200,mode=1) t.refresh() game = Game.new() player_num = random.randint(0, 1) if player_num==0: game.reverse() x = game.extract_features(players[player_num].player) #print(self.xy.eval()) game_step = 0 while not game.is_over(): game.next_step(players[player_num], player_num) player_num = (player_num + 1) % 2 x_next = game.extract_features(players[player_num].player) V_next = self.get_output(x_next) self.sess.run(self.train_op, feed_dict={ self.x: x, self.V_next: V_next }) x = x_next game_step += 1 winner = game.winner() _, global_step, summaries, _ = self.sess.run([ self.train_op, self.global_step, self.summaries_op, self.reset_op ], feed_dict={ self.x: x, self.V_next: np.array([[winner]], dtype='float') }) summary_writer.add_summary(summaries, global_step=global_step) #tqdm.write("Game %d/%d (Winner: %s) in %d turns" % (episode, episodes, players[winner].player, game_step)) self.saver.save(self.sess, self.checkpoint_path + 'checkpoint', global_step=global_step) tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) summary_writer.close()
def test(self, episodes=100, draw=False): players = [TDAgent(Game.TOKENS[0], self), RandomAgent(Game.TOKENS[1])] winners = [0, 0] for episode in range(episodes): game = Game.new() winner = game.play(players, draw=draw) winners[winner] += 1 winners_total = sum(winners) print("[Episode %d] %s (%s) vs %s (%s) %d:%d of %d games (%.2f%%)" % (episode, \ players[0].name, players[0].player, \ players[1].name, players[1].player, \ winners[0], winners[1], winners_total, \ (winners[0] / winners_total) * 100.0))
def train(self): with tf.device('/gpu:0') as dev: tf.train.write_graph(self.sess.graph_def, self.model_path, 'td_gammon.pb', as_text=False) summary_writer = tf.summary.FileWriter('{0}{1}'.format(self.summary_path, int(time.time()), self.sess.graph_def)) # the agent plays against itself, making the best move for each player players = [TDAgent(Game.TOKENS[0], self), TDAgent(Game.TOKENS[1], self)] validation_interval = 1000 episodes = 5000 for episode in range(episodes): if episode != 0 and episode % validation_interval == 0: self.test(episodes=100) game = Game.new() player_num = random.randint(0, 1) x = game.extract_features(players[player_num].player) game_step = 0 while not game.is_over(): game.next_step(players[player_num], player_num) player_num = (player_num + 1) % 2 x_next = game.extract_features(players[player_num].player) V_next = self.get_output(x_next) self.sess.run(self.train_op, feed_dict={ self.x: x, self.V_next: V_next }) x = x_next game_step += 1 winner = game.winner() _, global_step, summaries, _ = self.sess.run([ self.train_op, self.global_step, self.summaries_op, self.reset_op ], feed_dict={ self.x: x, self.V_next: np.array([[winner]], dtype='float') }) summary_writer.add_summary(summaries, global_step=global_step) print("Game %d/%d (Winner: %s) in %d turns" % (episode, episodes, players[winner].player, game_step)) self.saver.save(self.sess, self.checkpoint_path + 'checkpoint', global_step=global_step) summary_writer.close() self.test(episodes=1000)
def train(self): tf.train.write_graph(self.sess.graph_def, self.model_path, 'td_gammon.pb', as_text=False) summary_writer = tf.train.SummaryWriter('{0}{1}'.format(self.summary_path, int(time.time()), self.sess.graph_def)) # the agent plays against itself, making the best move for each player players = [TDAgent(Game.TOKENS[0], self), TDAgent(Game.TOKENS[1], self)] validation_interval = 1000 episodes = 5000 for episode in range(episodes): if episode != 0 and episode % validation_interval == 0: self.test(episodes=100) game = Game.new() player_num = random.randint(0, 1) x = game.extract_features(players[player_num].player) game_step = 0 while not game.is_over(): game.next_step(players[player_num], player_num) player_num = (player_num + 1) % 2 x_next = game.extract_features(players[player_num].player) V_next = self.get_output(x_next) self.sess.run(self.train_op, feed_dict={ self.x: x, self.V_next: V_next }) x = x_next game_step += 1 winner = game.winner() _, global_step, summaries, _ = self.sess.run([ self.train_op, self.global_step, self.summaries_op, self.reset_op ], feed_dict={ self.x: x, self.V_next: np.array([[winner]], dtype='float') }) summary_writer.add_summary(summaries, global_step=global_step) print("Game %d/%d (Winner: %s) in %d turns" % (episode, episodes, players[winner].player, game_step)) self.saver.save(self.sess, self.checkpoint_path + 'checkpoint', global_step=global_step) summary_writer.close() self.test(episodes=1000)
def main(): #random.seed(42) board = Board() game = Game(board) agent = RandomAgent(board) while not game.is_finished(): dice = game.roll_dice() board.print() allowed_moves_made = False while allowed_moves_made is False: human_moves = present_dice_to_human_and_ask_move(dice) try: allowed_moves_made = game.apply(game.PLAYER1, human_moves) except Exception as e: print(e) allowed_moves_made = False dice = game.roll_dice() ai_moves = agent.move(dice) print("Dice for AI were {}. Resulting moves: {}".format(dice, ", ".join([str(move) for move in ai_moves]))) game.apply(game.PLAYER2, ai_moves)
def test(self, episodes=100, draw=False, save=None): players = [ TDAgent(Game.TOKENS[0], self), TDAgent(Game.TOKENS[1], self) ] winners = [0, 0] for episode in range(episodes): game = Game.new() winner = game.play(players, draw=draw) if save: game.save_tmg(os.path.join(save, str(episode) + '.tmg')) winners[winner] += 1 winners_total = sum(winners) print("[Episode %d] %s (%s) vs %s (%s) %d:%d of %d games (%.2f%%)" % (episode, \ players[0].name, players[0].player, \ players[1].name, players[1].player, \ winners[0], winners[1], winners_total, \ (winners[0] / winners_total) * 100.0))
def test(self, episodes=100, draw=False, mode=0): if mode==0: players = [TDAgent(Game.TOKENS[0], self), Today_bot(Game.TOKENS[1])] if mode==1: players = [Today_bot(Game.TOKENS[0]), TDAgent(Game.TOKENS[1], self)] if mode==3: players = [TDAgent(Game.TOKENS[0], self), TDAgent(Game.TOKENS[1], self)] #players = [TDAgent(Game.TOKENS[0], self), TDAgent(Game.TOKENS[1], self)] winners = [0, 0] for episode in range(episodes): np.random.seed(episode) game = Game.new() winner = game.play(players, draw=draw) winners[winner] += 1 winners_total = sum(winners) if mode<3: print("[Episode %d] %s (%s) vs %s (%s) %d:%d of %d games (%.2f%%)" % (episode, \ players[0].name, players[0].player, \ players[1].name, players[1].player, \ winners[0], winners[1], winners_total, \ (winners[mode] / winners_total) * 100.0)) if (winners[mode] / winners_total) * 100.0 >self.max_wr: self.max_wr=(winners[mode] / winners_total) * 100.0 w1=self.l1_W.eval() b1=self.l1_b.eval() w2=self.l2_W.eval() b2=self.l2_b.eval() np.savetxt("w1.txt", w1) np.savetxt("w2.txt", w2) np.savetxt("b1.txt", b1) np.savetxt("b2.txt", b2) with open("max_wr.txt", "w") as text_file: text_file.write(str(self.max_wr)) else: print("[Episode %d] %s (%s) vs %s (%s) %d:%d of %d games (%.2f%%)" % (episode, \ players[0].name, players[0].player, \ players[1].name, players[1].player, \ winners[0], winners[1], winners_total, \ (winners[0] / winners_total) * 100.0))
def play(self): game = Game.new() game.play([TDAgent(Game.TOKENS[0], self), Today_bot(Game.TOKENS[1])], draw=True)
def run_game_loop(board, agent1, agent2, do_print=False): game = Game(board) while not game.is_finished(): agent1.learn() #agent1.backprop() dice = game.roll_dice() if do_print: board.print() print("Dice for AI1 were {}.".format(dice)) ai_moves = agent1.move(dice) if do_print: print("Resulting moves: {}.".format(", ".join( [str(move) for move in ai_moves]))) game.apply(game.PLAYER1, ai_moves) if game.is_finished(): break agent2.learn() #agent2.backprop() dice = game.roll_dice() if do_print: board.print() print("Dice for AI2 were {}.".format(dice)) ai_moves = agent2.move(dice) if do_print: print("Resulting moves: {}.".format(", ".join( [str(move) for move in ai_moves]))) game.apply(game.PLAYER2, ai_moves) winner = game.get_winner() if winner > 0: agent1.learn(np.array([1.0, 0.0])) agent2.learn(np.array([0.0, 1.0])) #agent1.backprop(np.array([1.0, 0.0])) #agent2.backprop(np.array([0.0, 1.0])) else: agent1.learn(np.array([0.0, 1.0])) agent2.learn(np.array([1.0, 0.0])) #agent1.backprop(np.array([0.0, 1.0])) #agent2.backprop(np.array([1.0, 0.0])) return winner, game.get_num_moves()
def move(self, dice): self.last_my_fields_before_move = copy.copy(self.my_fields) self.last_other_fields_before_move = copy.copy(self.other_fields) if dice[0] == dice[1]: final_moves = [] my_fields_after = copy.copy(self.my_fields) other_fields_after = copy.copy(self.other_fields) for _ in range(4): moves = self.generate_possible_moves(dice[0], my_fields_after, other_fields_after) if len(moves) == 0: return final_moves #print("Possible moves:\n{}".format(moves)) used_die, best_move = self.evaluate_moves_by_mlp( moves, my_fields_after, other_fields_after) #print("Best move: {}, {}".format(best_move, used_die)) final_moves.append(best_move) try: Game.apply_move(my_fields_after, other_fields_after, best_move) except Exception as e: #print("dice {}, best_move {}".format(dice, best_move)) self.print_intermediate_board(my_fields_after, other_fields_after) raise e #self.print_intermediate_board(my_fields_after, other_fields_after) return final_moves else: final_moves = [] moves = [] moves.extend( self.generate_possible_moves(dice[0], self.my_fields, self.other_fields)) moves.extend( self.generate_possible_moves(dice[1], self.my_fields, self.other_fields)) #print("Possible moves:\n{}".format(moves)) if len(moves) == 0: return final_moves used_die, best_move = self.evaluate_moves_by_mlp( moves, self.my_fields, self.other_fields) #print("Best move: {}, {}".format(best_move, used_die)) final_moves.append(best_move) my_fields_after = copy.copy(self.my_fields) other_fields_after = copy.copy(self.other_fields) Game.apply_move(my_fields_after, other_fields_after, best_move) #self.print_intermediate_board(my_fields_after, other_fields_after) other_die = dice[0] if dice[0] == used_die: other_die = dice[1] moves = self.generate_possible_moves(other_die, my_fields_after, other_fields_after) #print("Possible moves:\n{}".format(moves)) if len(moves) > 0: used_die, best_move = self.evaluate_moves_by_mlp( moves, my_fields_after, other_fields_after) #print("Best move: {}, {}".format(best_move, used_die)) final_moves.append(best_move) return final_moves
pos[25] = len(game.bar_pieces['x']) pos[26] = len(game.off_pieces['x']) pos[27] = -len(game.off_pieces['o']) for i, tup in enumerate(game.grid): if len(tup) == 0: pos[i + 1] = 0 elif tup[0] == 'x': pos[i + 1] = len(tup) else: pos[i + 1] = -len(tup) return pos if __name__ == '__main__': print pubeval(False, [0] + [-2, 0, 0, 0, 0, 5] + [0, 3, 0, 0, 0, -5] + [5, 0, 0, 0, -3, 0] + [-5, 0, 0, 0, 0, 2] + [0] + [0, 0]) from backgammon.game import Game g = Game.new() print pubeval(False, game_to_pos(g)) actions = g.get_actions((5, 6), 'x', nodups=True) for a in sorted([str(foo) for foo in actions]): print a print actions = g.get_actions((5, 6), 'o', nodups=True) for a in sorted([str(foo) for foo in actions]): print a
from backgammon.game import Game, Board # For testing purposes g = Game() g.initialize() print(g) g.play_(1, 0, 9) print(g) g.play_(-1, 7, 9) print(g) g.play_(-1, 7, 5) print(g) # g.play(1, 0, 11) # print(g) g.play_(-1, 5, 7) print(g)
def run_game_loop(board, agent1, agent2, do_print=False): game = Game(board) while not game.is_finished(): dice = game.roll_dice() if do_print: board.print() print("Dice for AI1 were {}.".format(dice)) ai_moves = agent1.move(dice) if do_print: print("Resulting moves: {}.".format(", ".join( [str(move) for move in ai_moves]))) game.apply(game.PLAYER1, ai_moves) if game.is_finished(): break dice = game.roll_dice() if do_print: board.print() print("Dice for AI2 were {}.".format(dice)) ai_moves = agent2.move(dice) if do_print: print("Resulting moves: {}.".format(", ".join( [str(move) for move in ai_moves]))) game.apply(game.PLAYER2, ai_moves) return game.get_winner()
def play(self): game = Game.new() game.play([TDAgent(Game.TOKENS[0], self), HumanAgent(Game.TOKENS[1])], draw=True)
def train(self, episodes = 5000): tf.train.write_graph(self.sess.graph_def, self.model_path, 'td_gammon.pb', as_text=False) summary_writer = tf.train.SummaryWriter('{0}{1}'.format(self.summary_path, int(time.time()), graph_def=self.sess.graph_def)) # the agent plays against itself, making the best move for each player players = [TDAgent(Game.TOKENS[0], self), TDAgent(Game.TOKENS[1], self)] validation_interval = 1000 report_freq = 10 prev_time = time.time() prev_step = self.sess.run(self.global_step) plies_per_batch = 0 for episode in range(episodes): if episode != 0 and episode % validation_interval == 0: self.test(episodes=100) game = Game.new() player_num = random.randint(0, 1) x = game.extract_features(players[player_num].player) game_step = 0 while not game.is_over(): game.next_step(players[player_num], player_num) player_num = (player_num + 1) % 2 x_next = game.extract_features(players[player_num].player) V_next = self.get_output(x_next) self.sess.run(self.train_op, feed_dict={ self.x: x, self.V_next: V_next }) x = x_next game_step += 1 winner = game.winner() _, global_step, summaries, _ = self.sess.run([ self.train_op, self.global_step, self.summaries_op, self.reset_op ], feed_dict={ self.x: x, self.V_next: np.array([[winner]], dtype='float') }) print("Game %d/%d (Winner: %s) in %d turns" % (episode, episodes, players[winner].player, game_step)) plies_per_batch += game_step if episode != 0 and episode % report_freq == 0: now = time.time() elapsed_time = now - prev_time steps_per_sec = (global_step - prev_step) / elapsed_time games_per_sec = report_freq / elapsed_time plies_per_game = plies_per_batch / report_freq print('e=%.2f sps=%.2f gps=%.2f ppg=%.1f global=%d prev=%d' % (elapsed_time, steps_per_sec, games_per_sec, plies_per_game, global_step, prev_step)) summary_writer.add_summary(summaries, global_step=global_step) s1 = tf.Summary(value=[tf.Summary.Value(tag='rate/global_steps_sec', simple_value=steps_per_sec)]) summary_writer.add_summary(s1, global_step) s2 = tf.Summary(value=[tf.Summary.Value(tag='rate/games_sec', simple_value=games_per_sec)]) summary_writer.add_summary(s2, global_step) s3 = tf.Summary(value=[tf.Summary.Value(tag='rate/plies_per_game', simple_value=plies_per_game)]) summary_writer.add_summary(s3, global_step) self.saver.save(self.sess, self.checkpoint_path + 'checkpoint', global_step=global_step) prev_time = now prev_step = global_step plies_per_batch = 0 summary_writer.close() self.test(episodes=1000)
def random_selfplay(self): players = [RandomAgent(Game.TOKENS[0]), RandomAgent(Game.TOKENS[1])] game = Game.new() game.SLEEP = 0 winner = game.play(players, draw=True)
def play(self, ts=False): game = Game.new() game.play([TDAgent(Game.TOKENS[0], self), HumanAgent(Game.TOKENS[1])], draw=True, ts=ts)
elif tup[0] == 'x': pos[i + 1] = len(tup) else: pos[i + 1] = -len(tup) return pos if __name__ == '__main__': print pubeval(False, [0] + [-2, 0, 0, 0, 0, 5] + [ 0, 3, 0, 0, 0, -5] + [ 5, 0, 0, 0, -3, 0] + [-5, 0, 0, 0, 0, 2] + [ 0] + [ 0, 0]) from backgammon.game import Game g = Game.new() print pubeval(False, game_to_pos(g)) actions = g.get_actions((5, 6), 'x', nodups=True) for a in sorted([str(foo) for foo in actions]): print a print actions = g.get_actions((5, 6), 'o', nodups=True) for a in sorted([str(foo) for foo in actions]): print a
def train(self): tf.train.write_graph(self.sess.graph_def, self.model_path, 'td_gammon.pb', as_text=False) summary_writer = tf.summary.FileWriter('{0}{1}'.format( self.summary_path, int(time.time()), self.sess.graph_def)) # the agent plays against itself, making the best move for each player players = [ TDAgent(Game.TOKENS[0], self), TDAgent(Game.TOKENS[1], self) ] #validation_interval = 1000 #episodes = 5000 validation_interval = 500 episodes = 5000 train_start_ts = time.time() for episode in range(episodes): start_ts = time.time() if episode != 0 and episode % validation_interval == 0: print('Episode:', episode) write('Episode: %d' % episode) self.test(episodes=100) game = Game.new() player_num = random.randint(0, 1) x = game.extract_features(players[player_num].player) game_step = 0 while not game.is_over(): game.next_step(players[player_num], player_num) player_num = (player_num + 1) % 2 x_next = game.extract_features(players[player_num].player) V_next = self.get_output(x_next) self.sess.run(self.train_op, feed_dict={ self.x: x, self.V_next: V_next }) x = x_next game_step += 1 winner = game.winner() _, global_step, summaries, _ = self.sess.run([ self.train_op, self.global_step, self.summaries_op, self.reset_op, ], feed_dict={ self.x: x, self.V_next: np.array( [[winner]], dtype='float') }) summary_writer.add_summary(summaries, global_step=global_step) end_ts = time.time() print("%.2f - Game %d/%d (Winner: %s) in %d turns (%.2f secs)" % (self.k, episode, episodes, players[winner].player, game_step, end_ts - start_ts)) """if episode in [9, 99, 999, 9999, 99999]: print("%d games avg time: %.2f secs" % (episode+1, (end_ts - train_start_ts) / (episode+1))) """ self.saver.save(self.sess, self.checkpoint_path + 'checkpoint', global_step=global_step) summary_writer.close() write('Episode: 5000') self.test(episodes=100)